Mercurial > repos > devteam > flanking_features
annotate flanking_features.py @ 3:94248d5b9b8b draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/flanking_features commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
author | devteam |
---|---|
date | Thu, 22 Jun 2017 18:39:31 -0400 |
parents | 850c05b9af00 |
children |
rev | line source |
---|---|
0 | 1 #!/usr/bin/env python |
3
94248d5b9b8b
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/flanking_features commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
devteam
parents:
1
diff
changeset
|
2 # By: Guruprasad Ananda |
0 | 3 """ |
4 Fetch closest up/downstream interval from features corresponding to every interval in primary | |
5 | |
6 usage: %prog primary_file features_file out_file direction | |
7 -1, --cols1=N,N,N,N: Columns for start, end, strand in first file | |
8 -2, --cols2=N,N,N,N: Columns for start, end, strand in second file | |
9 -G, --gff1: input 1 is GFF format, meaning start and end coordinates are 1-based, closed interval | |
10 -H, --gff2: input 2 is GFF format, meaning start and end coordinates are 1-based, closed interval | |
11 """ | |
3
94248d5b9b8b
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/flanking_features commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
devteam
parents:
1
diff
changeset
|
12 from __future__ import print_function |
0 | 13 |
1
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
14 import fileinput |
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
15 import sys |
3
94248d5b9b8b
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/flanking_features commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
devteam
parents:
1
diff
changeset
|
16 |
0 | 17 from bx.cookbook import doc_optparse |
1
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
18 from bx.intervals.io import Comment, GenomicInterval, Header, NiceReaderWrapper |
0 | 19 from bx.intervals.operations import quicksect |
1
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
20 from bx.tabular.io import ParseError |
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
21 from galaxy.tools.util.galaxyops import fail, parse_cols_arg, skipped |
3
94248d5b9b8b
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/flanking_features commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
devteam
parents:
1
diff
changeset
|
22 |
1
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
23 from utils.gff_util import convert_bed_coords_to_gff, GFFIntervalToBEDReaderWrapper |
0 | 24 |
25 assert sys.version_info[:2] >= ( 2, 4 ) | |
26 | |
1
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
27 |
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
28 def get_closest_feature(node, direction, threshold_up, threshold_down, report_func_up, report_func_down): |
3
94248d5b9b8b
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/flanking_features commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
devteam
parents:
1
diff
changeset
|
29 # direction=1 for +ve strand upstream and -ve strand downstream cases; and it is 0 for +ve strand downstream and -ve strand upstream cases |
94248d5b9b8b
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/flanking_features commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
devteam
parents:
1
diff
changeset
|
30 # threhold_Up is equal to the interval start for +ve strand, and interval end for -ve strand |
94248d5b9b8b
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/flanking_features commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
devteam
parents:
1
diff
changeset
|
31 # threhold_down is equal to the interval end for +ve strand, and interval start for -ve strand |
1
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
32 if direction == 1: |
0 | 33 if node.maxend <= threshold_up: |
34 if node.end == node.maxend: | |
35 report_func_up(node) | |
36 elif node.right and node.left: | |
37 if node.right.maxend == node.maxend: | |
38 get_closest_feature(node.right, direction, threshold_up, threshold_down, report_func_up, report_func_down) | |
39 elif node.left.maxend == node.maxend: | |
40 get_closest_feature(node.left, direction, threshold_up, threshold_down, report_func_up, report_func_down) | |
41 elif node.right and node.right.maxend == node.maxend: | |
42 get_closest_feature(node.right, direction, threshold_up, threshold_down, report_func_up, report_func_down) | |
43 elif node.left and node.left.maxend == node.maxend: | |
44 get_closest_feature(node.left, direction, threshold_up, threshold_down, report_func_up, report_func_down) | |
45 elif node.minend <= threshold_up: | |
46 if node.end <= threshold_up: | |
47 report_func_up(node) | |
48 if node.left and node.right: | |
49 if node.right.minend <= threshold_up: | |
50 get_closest_feature(node.right, direction, threshold_up, threshold_down, report_func_up, report_func_down) | |
51 if node.left.minend <= threshold_up: | |
52 get_closest_feature(node.left, direction, threshold_up, threshold_down, report_func_up, report_func_down) | |
53 elif node.left: | |
54 if node.left.minend <= threshold_up: | |
55 get_closest_feature(node.left, direction, threshold_up, threshold_down, report_func_up, report_func_down) | |
56 elif node.right: | |
57 if node.right.minend <= threshold_up: | |
58 get_closest_feature(node.right, direction, threshold_up, threshold_down, report_func_up, report_func_down) | |
59 elif direction == 0: | |
60 if node.start > threshold_down: | |
61 report_func_down(node) | |
62 if node.left: | |
63 get_closest_feature(node.left, direction, threshold_up, threshold_down, report_func_up, report_func_down) | |
64 else: | |
65 if node.right: | |
66 get_closest_feature(node.right, direction, threshold_up, threshold_down, report_func_up, report_func_down) | |
67 | |
1
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
68 |
0 | 69 def proximal_region_finder(readers, region, comments=True): |
70 """ | |
1
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
71 Returns an iterator that yields elements of the form [ <original_interval>, <closest_feature> ]. |
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
72 Intervals are GenomicInterval objects. |
0 | 73 """ |
74 primary = readers[0] | |
75 features = readers[1] | |
76 either = False | |
77 if region == 'Upstream': | |
78 up, down = True, False | |
79 elif region == 'Downstream': | |
80 up, down = False, True | |
81 else: | |
82 up, down = True, True | |
83 if region == 'Either': | |
84 either = True | |
1
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
85 |
0 | 86 # Read features into memory: |
87 rightTree = quicksect.IntervalTree() | |
88 for item in features: | |
89 if type( item ) is GenomicInterval: | |
90 rightTree.insert( item, features.linenum, item ) | |
1
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
91 |
0 | 92 for interval in primary: |
93 if type( interval ) is Header: | |
94 yield interval | |
95 if type( interval ) is Comment and comments: | |
96 yield interval | |
97 elif type( interval ) == GenomicInterval: | |
98 chrom = interval.chrom | |
99 start = int(interval.start) | |
100 end = int(interval.end) | |
101 strand = interval.strand | |
102 if chrom not in rightTree.chroms: | |
103 continue | |
104 else: | |
1
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
105 root = rightTree.chroms[chrom] # root node for the chrom tree |
0 | 106 result_up = [] |
107 result_down = [] | |
1
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
108 if (strand == '+' and up) or (strand == '-' and down): |
3
94248d5b9b8b
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/flanking_features commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
devteam
parents:
1
diff
changeset
|
109 # upstream +ve strand and downstream -ve strand cases |
1
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
110 get_closest_feature(root, 1, start, None, lambda node: result_up.append( node ), None) |
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
111 |
0 | 112 if (strand == '+' and down) or (strand == '-' and up): |
3
94248d5b9b8b
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/flanking_features commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
devteam
parents:
1
diff
changeset
|
113 # downstream +ve strand and upstream -ve strand case |
1
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
114 get_closest_feature(root, 0, None, end - 1, None, lambda node: result_down.append( node )) |
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
115 |
0 | 116 if result_up: |
1
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
117 if len(result_up) > 1: # The results_up list has a list of intervals upstream to the given interval. |
0 | 118 ends = [] |
119 for n in result_up: | |
120 ends.append(n.end) | |
1
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
121 res_ind = ends.index(max(ends)) # fetch the index of the closest interval i.e. the interval with the max end from the results_up list |
0 | 122 else: |
123 res_ind = 0 | |
124 if not(either): | |
125 yield [ interval, result_up[res_ind].other ] | |
1
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
126 |
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
127 if result_down: |
0 | 128 if not(either): |
3
94248d5b9b8b
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/flanking_features commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
devteam
parents:
1
diff
changeset
|
129 # The last element of result_down will be the closest element to the given interval |
1
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
130 yield [ interval, result_down[-1].other ] |
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
131 |
0 | 132 if either and (result_up or result_down): |
133 iter_val = [] | |
134 if result_up and result_down: | |
135 if abs(start - int(result_up[res_ind].end)) <= abs(end - int(result_down[-1].start)): | |
136 iter_val = [ interval, result_up[res_ind].other ] | |
137 else: | |
3
94248d5b9b8b
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/flanking_features commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
devteam
parents:
1
diff
changeset
|
138 # The last element of result_down will be the closest element to the given interval |
0 | 139 iter_val = [ interval, result_down[-1].other ] |
140 elif result_up: | |
141 iter_val = [ interval, result_up[res_ind].other ] | |
142 elif result_down: | |
3
94248d5b9b8b
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/flanking_features commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
devteam
parents:
1
diff
changeset
|
143 # The last element of result_down will be the closest element to the given interval |
0 | 144 iter_val = [ interval, result_down[-1].other ] |
145 yield iter_val | |
1
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
146 |
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
147 |
0 | 148 def main(): |
149 options, args = doc_optparse.parse( __doc__ ) | |
150 try: | |
151 chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 ) | |
152 chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2 ) | |
153 in1_gff_format = bool( options.gff1 ) | |
154 in2_gff_format = bool( options.gff2 ) | |
155 in_fname, in2_fname, out_fname, direction = args | |
156 except: | |
157 doc_optparse.exception() | |
1
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
158 |
0 | 159 # Set readers to handle either GFF or default format. |
160 if in1_gff_format: | |
161 in1_reader_wrapper = GFFIntervalToBEDReaderWrapper | |
162 else: | |
163 in1_reader_wrapper = NiceReaderWrapper | |
164 if in2_gff_format: | |
165 in2_reader_wrapper = GFFIntervalToBEDReaderWrapper | |
166 else: | |
167 in2_reader_wrapper = NiceReaderWrapper | |
168 | |
169 g1 = in1_reader_wrapper( fileinput.FileInput( in_fname ), | |
1
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
170 chrom_col=chr_col_1, |
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
171 start_col=start_col_1, |
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
172 end_col=end_col_1, |
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
173 strand_col=strand_col_1, |
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
174 fix_strand=True ) |
0 | 175 g2 = in2_reader_wrapper( fileinput.FileInput( in2_fname ), |
1
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
176 chrom_col=chr_col_2, |
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
177 start_col=start_col_2, |
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
178 end_col=end_col_2, |
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
179 strand_col=strand_col_2, |
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
180 fix_strand=True ) |
0 | 181 |
182 # Find flanking features. | |
183 out_file = open( out_fname, "w" ) | |
184 try: | |
1
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
185 for result in proximal_region_finder([g1, g2], direction): |
0 | 186 if type( result ) is list: |
187 line, closest_feature = result | |
188 # Need to join outputs differently depending on file types. | |
189 if in1_gff_format: | |
190 # Output is GFF with added attribute 'closest feature.' | |
191 | |
192 # Invervals are in BED coordinates; need to convert to GFF. | |
193 line = convert_bed_coords_to_gff( line ) | |
194 closest_feature = convert_bed_coords_to_gff( closest_feature ) | |
1
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
195 |
0 | 196 # Replace double quotes with single quotes in closest feature's attributes. |
1
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
197 out_file.write( "%s closest_feature \"%s\" \n" % |
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
198 ( "\t".join( line.fields ), |
0 | 199 "\t".join( closest_feature.fields ).replace( "\"", "\\\"" ) |
1
850c05b9af00
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
200 ) ) |
0 | 201 else: |
202 # Output is BED + closest feature fields. | |
203 output_line_fields = [] | |
204 output_line_fields.extend( line.fields ) | |
205 output_line_fields.extend( closest_feature.fields ) | |
206 out_file.write( "%s\n" % ( "\t".join( output_line_fields ) ) ) | |
207 else: | |
208 out_file.write( "%s\n" % result ) | |
3
94248d5b9b8b
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/flanking_features commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
devteam
parents:
1
diff
changeset
|
209 except ParseError as exc: |
0 | 210 fail( "Invalid file format: %s" % str( exc ) ) |
211 | |
3
94248d5b9b8b
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/flanking_features commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
devteam
parents:
1
diff
changeset
|
212 print("Direction: %s" % (direction)) |
0 | 213 if g1.skipped > 0: |
3
94248d5b9b8b
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/flanking_features commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
devteam
parents:
1
diff
changeset
|
214 print(skipped( g1, filedesc=" of 1st dataset" )) |
0 | 215 if g2.skipped > 0: |
3
94248d5b9b8b
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/flanking_features commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
devteam
parents:
1
diff
changeset
|
216 print(skipped( g2, filedesc=" of 2nd dataset" )) |
94248d5b9b8b
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/flanking_features commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
devteam
parents:
1
diff
changeset
|
217 |
0 | 218 |
219 if __name__ == "__main__": | |
220 main() |