Mercurial > repos > devteam > get_flanks
annotate get_flanks.py @ 3:2fdec558c935 draft
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
author | devteam |
---|---|
date | Tue, 13 Oct 2015 12:50:28 -0400 |
parents | 0c66884f0cac |
children | dd9315a56c09 |
rev | line source |
---|---|
0 | 1 #!/usr/bin/env python |
2 #Done by: Guru | |
3 | |
4 """ | |
5 Get Flanking regions. | |
6 | |
7 usage: %prog input out_file size direction region | |
8 -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file | |
9 -o, --off=N: Offset | |
10 """ | |
11 | |
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
12 import sys |
0 | 13 from bx.cookbook import doc_optparse |
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
14 from galaxy.tools.util.galaxyops import parse_cols_arg |
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
15 |
0 | 16 |
17 def stop_err( msg ): | |
18 sys.stderr.write( msg ) | |
19 sys.exit() | |
20 | |
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
21 |
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
22 def main(): |
0 | 23 try: |
24 if int( sys.argv[3] ) < 0: | |
25 raise Exception | |
26 except: | |
27 stop_err( "Length of flanking region(s) must be a non-negative integer." ) | |
28 | |
29 # Parsing Command Line here | |
30 options, args = doc_optparse.parse( __doc__ ) | |
31 try: | |
32 chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols ) | |
33 inp_file, out_file, size, direction, region = args | |
34 if strand_col_1 <= 0: | |
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
35 strand = "+" # if strand is not defined, default it to + |
0 | 36 except: |
37 stop_err( "Metadata issue, correct the metadata attributes by clicking on the pencil icon in the history item." ) | |
38 try: | |
39 offset = int(options.off) | |
40 size = int(size) | |
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
41 except: |
0 | 42 stop_err( "Invalid offset or length entered. Try again by entering valid integer values." ) |
43 | |
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
44 fo = open(out_file, 'w') |
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
45 |
0 | 46 skipped_lines = 0 |
47 first_invalid_line = 0 | |
48 invalid_line = None | |
49 elems = [] | |
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
50 j = 0 |
0 | 51 for i, line in enumerate( file( inp_file ) ): |
52 line = line.strip() | |
53 if line and (not line.startswith( '#' )) and line != '': | |
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
54 j += 1 |
0 | 55 try: |
56 elems = line.split('\t') | |
57 #if the start and/or end columns are not numbers, skip that line. | |
58 assert int(elems[start_col_1]) | |
59 assert int(elems[end_col_1]) | |
60 if strand_col_1 != -1: | |
61 strand = elems[strand_col_1] | |
62 #if the stand value is not + or -, skip that line. | |
63 assert strand in ['+', '-'] | |
64 if direction == 'Upstream': | |
65 if strand == '+': | |
66 if region == 'end': | |
67 elems[end_col_1] = str(int(elems[end_col_1]) + offset) | |
68 elems[start_col_1] = str( int(elems[end_col_1]) - size ) | |
69 else: | |
70 elems[end_col_1] = str(int(elems[start_col_1]) + offset) | |
71 elems[start_col_1] = str( int(elems[end_col_1]) - size ) | |
72 elif strand == '-': | |
73 if region == 'end': | |
74 elems[start_col_1] = str(int(elems[start_col_1]) - offset) | |
75 elems[end_col_1] = str(int(elems[start_col_1]) + size) | |
76 else: | |
77 elems[start_col_1] = str(int(elems[end_col_1]) - offset) | |
78 elems[end_col_1] = str(int(elems[start_col_1]) + size) | |
79 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
80 fo.write( "%s\n" % '\t'.join( elems ) ) | |
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
81 |
0 | 82 elif direction == 'Downstream': |
83 if strand == '-': | |
84 if region == 'start': | |
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
85 elems[end_col_1] = str(int(elems[end_col_1]) - offset) |
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
86 elems[start_col_1] = str( int(elems[end_col_1]) - size ) |
0 | 87 else: |
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
88 elems[end_col_1] = str(int(elems[start_col_1]) - offset) |
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
89 elems[start_col_1] = str( int(elems[end_col_1]) - size ) |
0 | 90 elif strand == '+': |
91 if region == 'start': | |
92 elems[start_col_1] = str(int(elems[start_col_1]) + offset) | |
93 elems[end_col_1] = str(int(elems[start_col_1]) + size) | |
94 else: | |
95 elems[start_col_1] = str(int(elems[end_col_1]) + offset) | |
96 elems[end_col_1] = str(int(elems[start_col_1]) + size) | |
97 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
98 fo.write( "%s\n" % '\t'.join( elems ) ) | |
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
99 |
0 | 100 elif direction == 'Both': |
101 if strand == '-': | |
102 if region == 'start': | |
103 start = str(int(elems[end_col_1]) - offset) | |
104 end1 = str(int(start) + size) | |
105 end2 = str(int(start) - size) | |
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
106 elems[start_col_1] = start |
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
107 elems[end_col_1] = end1 |
0 | 108 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
109 fo.write( "%s\n" % '\t'.join( elems ) ) | |
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
110 elems[start_col_1] = end2 |
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
111 elems[end_col_1] = start |
0 | 112 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
113 fo.write( "%s\n" % '\t'.join( elems ) ) | |
114 elif region == 'end': | |
115 start = str(int(elems[start_col_1]) - offset) | |
116 end1 = str(int(start) + size) | |
117 end2 = str(int(start) - size) | |
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
118 elems[start_col_1] = start |
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
119 elems[end_col_1] = end1 |
0 | 120 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
121 fo.write( "%s\n" % '\t'.join( elems ) ) | |
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
122 elems[start_col_1] = end2 |
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
123 elems[end_col_1] = start |
0 | 124 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
125 fo.write( "%s\n" % '\t'.join( elems ) ) | |
126 else: | |
127 start1 = str(int(elems[end_col_1]) - offset) | |
128 end1 = str(int(start1) + size) | |
129 start2 = str(int(elems[start_col_1]) - offset) | |
130 end2 = str(int(start2) - size) | |
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
131 elems[start_col_1] = start1 |
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
132 elems[end_col_1] = end1 |
0 | 133 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
134 fo.write( "%s\n" % '\t'.join( elems ) ) | |
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
135 elems[start_col_1] = end2 |
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
136 elems[end_col_1] = start2 |
0 | 137 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
138 fo.write( "%s\n" % '\t'.join( elems ) ) | |
139 elif strand == '+': | |
140 if region == 'start': | |
141 start = str(int(elems[start_col_1]) + offset) | |
142 end1 = str(int(start) - size) | |
143 end2 = str(int(start) + size) | |
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
144 elems[start_col_1] = end1 |
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
145 elems[end_col_1] = start |
0 | 146 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
147 fo.write( "%s\n" % '\t'.join( elems ) ) | |
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
148 elems[start_col_1] = start |
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
149 elems[end_col_1] = end2 |
0 | 150 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
151 fo.write( "%s\n" % '\t'.join( elems ) ) | |
152 elif region == 'end': | |
153 start = str(int(elems[end_col_1]) + offset) | |
154 end1 = str(int(start) - size) | |
155 end2 = str(int(start) + size) | |
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
156 elems[start_col_1] = end1 |
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
157 elems[end_col_1] = start |
0 | 158 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
159 fo.write( "%s\n" % '\t'.join( elems ) ) | |
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
160 elems[start_col_1] = start |
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
161 elems[end_col_1] = end2 |
0 | 162 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
163 fo.write( "%s\n" % '\t'.join( elems ) ) | |
164 else: | |
165 start1 = str(int(elems[start_col_1]) + offset) | |
166 end1 = str(int(start1) - size) | |
167 start2 = str(int(elems[end_col_1]) + offset) | |
168 end2 = str(int(start2) + size) | |
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
169 elems[start_col_1] = end1 |
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
170 elems[end_col_1] = start1 |
0 | 171 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
172 fo.write( "%s\n" % '\t'.join( elems ) ) | |
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
173 elems[start_col_1] = start2 |
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
174 elems[end_col_1] = end2 |
0 | 175 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
176 fo.write( "%s\n" % '\t'.join( elems ) ) | |
177 except: | |
178 skipped_lines += 1 | |
179 if not invalid_line: | |
180 first_invalid_line = i + 1 | |
181 invalid_line = line | |
182 fo.close() | |
183 | |
184 if skipped_lines == j: | |
185 stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." ) | |
186 if skipped_lines > 0: | |
187 print 'Skipped %d invalid lines starting with #%dL "%s"' % ( skipped_lines, first_invalid_line, invalid_line ) | |
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
188 print 'Location: %s, Region: %s, Flank-length: %d, Offset: %d ' % ( direction, region, size, offset ) |
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
189 |
0 | 190 if __name__ == "__main__": |
191 main() |