Mercurial > repos > devteam > get_flanks
comparison get_flanks.py @ 3:2fdec558c935 draft
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
author | devteam |
---|---|
date | Tue, 13 Oct 2015 12:50:28 -0400 |
parents | 0c66884f0cac |
children | dd9315a56c09 |
comparison
equal
deleted
inserted
replaced
2:0eae46dcc35c | 3:2fdec558c935 |
---|---|
7 usage: %prog input out_file size direction region | 7 usage: %prog input out_file size direction region |
8 -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file | 8 -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file |
9 -o, --off=N: Offset | 9 -o, --off=N: Offset |
10 """ | 10 """ |
11 | 11 |
12 import sys, re, os | 12 import sys |
13 from bx.cookbook import doc_optparse | 13 from bx.cookbook import doc_optparse |
14 from galaxy.tools.util.galaxyops import * | 14 from galaxy.tools.util.galaxyops import parse_cols_arg |
15 | |
15 | 16 |
16 def stop_err( msg ): | 17 def stop_err( msg ): |
17 sys.stderr.write( msg ) | 18 sys.stderr.write( msg ) |
18 sys.exit() | 19 sys.exit() |
19 | 20 |
20 def main(): | 21 |
22 def main(): | |
21 try: | 23 try: |
22 if int( sys.argv[3] ) < 0: | 24 if int( sys.argv[3] ) < 0: |
23 raise Exception | 25 raise Exception |
24 except: | 26 except: |
25 stop_err( "Length of flanking region(s) must be a non-negative integer." ) | 27 stop_err( "Length of flanking region(s) must be a non-negative integer." ) |
28 options, args = doc_optparse.parse( __doc__ ) | 30 options, args = doc_optparse.parse( __doc__ ) |
29 try: | 31 try: |
30 chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols ) | 32 chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols ) |
31 inp_file, out_file, size, direction, region = args | 33 inp_file, out_file, size, direction, region = args |
32 if strand_col_1 <= 0: | 34 if strand_col_1 <= 0: |
33 strand = "+" #if strand is not defined, default it to + | 35 strand = "+" # if strand is not defined, default it to + |
34 except: | 36 except: |
35 stop_err( "Metadata issue, correct the metadata attributes by clicking on the pencil icon in the history item." ) | 37 stop_err( "Metadata issue, correct the metadata attributes by clicking on the pencil icon in the history item." ) |
36 try: | 38 try: |
37 offset = int(options.off) | 39 offset = int(options.off) |
38 size = int(size) | 40 size = int(size) |
39 except: | 41 except: |
40 stop_err( "Invalid offset or length entered. Try again by entering valid integer values." ) | 42 stop_err( "Invalid offset or length entered. Try again by entering valid integer values." ) |
41 | 43 |
42 fo = open(out_file,'w') | 44 fo = open(out_file, 'w') |
43 | 45 |
44 skipped_lines = 0 | 46 skipped_lines = 0 |
45 first_invalid_line = 0 | 47 first_invalid_line = 0 |
46 invalid_line = None | 48 invalid_line = None |
47 elems = [] | 49 elems = [] |
48 j=0 | 50 j = 0 |
49 for i, line in enumerate( file( inp_file ) ): | 51 for i, line in enumerate( file( inp_file ) ): |
50 line = line.strip() | 52 line = line.strip() |
51 if line and (not line.startswith( '#' )) and line != '': | 53 if line and (not line.startswith( '#' )) and line != '': |
52 j+=1 | 54 j += 1 |
53 try: | 55 try: |
54 elems = line.split('\t') | 56 elems = line.split('\t') |
55 #if the start and/or end columns are not numbers, skip that line. | 57 #if the start and/or end columns are not numbers, skip that line. |
56 assert int(elems[start_col_1]) | 58 assert int(elems[start_col_1]) |
57 assert int(elems[end_col_1]) | 59 assert int(elems[end_col_1]) |
74 else: | 76 else: |
75 elems[start_col_1] = str(int(elems[end_col_1]) - offset) | 77 elems[start_col_1] = str(int(elems[end_col_1]) - offset) |
76 elems[end_col_1] = str(int(elems[start_col_1]) + size) | 78 elems[end_col_1] = str(int(elems[start_col_1]) + size) |
77 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | 79 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
78 fo.write( "%s\n" % '\t'.join( elems ) ) | 80 fo.write( "%s\n" % '\t'.join( elems ) ) |
79 | 81 |
80 elif direction == 'Downstream': | 82 elif direction == 'Downstream': |
81 if strand == '-': | 83 if strand == '-': |
82 if region == 'start': | 84 if region == 'start': |
83 elems[end_col_1] = str(int(elems[end_col_1]) - offset) | 85 elems[end_col_1] = str(int(elems[end_col_1]) - offset) |
84 elems[start_col_1] = str( int(elems[end_col_1]) - size ) | 86 elems[start_col_1] = str( int(elems[end_col_1]) - size ) |
85 else: | 87 else: |
86 elems[end_col_1] = str(int(elems[start_col_1]) - offset) | 88 elems[end_col_1] = str(int(elems[start_col_1]) - offset) |
87 elems[start_col_1] = str( int(elems[end_col_1]) - size ) | 89 elems[start_col_1] = str( int(elems[end_col_1]) - size ) |
88 elif strand == '+': | 90 elif strand == '+': |
89 if region == 'start': | 91 if region == 'start': |
90 elems[start_col_1] = str(int(elems[start_col_1]) + offset) | 92 elems[start_col_1] = str(int(elems[start_col_1]) + offset) |
91 elems[end_col_1] = str(int(elems[start_col_1]) + size) | 93 elems[end_col_1] = str(int(elems[start_col_1]) + size) |
92 else: | 94 else: |
93 elems[start_col_1] = str(int(elems[end_col_1]) + offset) | 95 elems[start_col_1] = str(int(elems[end_col_1]) + offset) |
94 elems[end_col_1] = str(int(elems[start_col_1]) + size) | 96 elems[end_col_1] = str(int(elems[start_col_1]) + size) |
95 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | 97 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
96 fo.write( "%s\n" % '\t'.join( elems ) ) | 98 fo.write( "%s\n" % '\t'.join( elems ) ) |
97 | 99 |
98 elif direction == 'Both': | 100 elif direction == 'Both': |
99 if strand == '-': | 101 if strand == '-': |
100 if region == 'start': | 102 if region == 'start': |
101 start = str(int(elems[end_col_1]) - offset) | 103 start = str(int(elems[end_col_1]) - offset) |
102 end1 = str(int(start) + size) | 104 end1 = str(int(start) + size) |
103 end2 = str(int(start) - size) | 105 end2 = str(int(start) - size) |
104 elems[start_col_1]=start | 106 elems[start_col_1] = start |
105 elems[end_col_1]=end1 | 107 elems[end_col_1] = end1 |
106 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | 108 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
107 fo.write( "%s\n" % '\t'.join( elems ) ) | 109 fo.write( "%s\n" % '\t'.join( elems ) ) |
108 elems[start_col_1]=end2 | 110 elems[start_col_1] = end2 |
109 elems[end_col_1]=start | 111 elems[end_col_1] = start |
110 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | 112 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
111 fo.write( "%s\n" % '\t'.join( elems ) ) | 113 fo.write( "%s\n" % '\t'.join( elems ) ) |
112 elif region == 'end': | 114 elif region == 'end': |
113 start = str(int(elems[start_col_1]) - offset) | 115 start = str(int(elems[start_col_1]) - offset) |
114 end1 = str(int(start) + size) | 116 end1 = str(int(start) + size) |
115 end2 = str(int(start) - size) | 117 end2 = str(int(start) - size) |
116 elems[start_col_1]=start | 118 elems[start_col_1] = start |
117 elems[end_col_1]=end1 | 119 elems[end_col_1] = end1 |
118 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | 120 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
119 fo.write( "%s\n" % '\t'.join( elems ) ) | 121 fo.write( "%s\n" % '\t'.join( elems ) ) |
120 elems[start_col_1]=end2 | 122 elems[start_col_1] = end2 |
121 elems[end_col_1]=start | 123 elems[end_col_1] = start |
122 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | 124 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
123 fo.write( "%s\n" % '\t'.join( elems ) ) | 125 fo.write( "%s\n" % '\t'.join( elems ) ) |
124 else: | 126 else: |
125 start1 = str(int(elems[end_col_1]) - offset) | 127 start1 = str(int(elems[end_col_1]) - offset) |
126 end1 = str(int(start1) + size) | 128 end1 = str(int(start1) + size) |
127 start2 = str(int(elems[start_col_1]) - offset) | 129 start2 = str(int(elems[start_col_1]) - offset) |
128 end2 = str(int(start2) - size) | 130 end2 = str(int(start2) - size) |
129 elems[start_col_1]=start1 | 131 elems[start_col_1] = start1 |
130 elems[end_col_1]=end1 | 132 elems[end_col_1] = end1 |
131 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | 133 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
132 fo.write( "%s\n" % '\t'.join( elems ) ) | 134 fo.write( "%s\n" % '\t'.join( elems ) ) |
133 elems[start_col_1]=end2 | 135 elems[start_col_1] = end2 |
134 elems[end_col_1]=start2 | 136 elems[end_col_1] = start2 |
135 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | 137 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
136 fo.write( "%s\n" % '\t'.join( elems ) ) | 138 fo.write( "%s\n" % '\t'.join( elems ) ) |
137 elif strand == '+': | 139 elif strand == '+': |
138 if region == 'start': | 140 if region == 'start': |
139 start = str(int(elems[start_col_1]) + offset) | 141 start = str(int(elems[start_col_1]) + offset) |
140 end1 = str(int(start) - size) | 142 end1 = str(int(start) - size) |
141 end2 = str(int(start) + size) | 143 end2 = str(int(start) + size) |
142 elems[start_col_1]=end1 | 144 elems[start_col_1] = end1 |
143 elems[end_col_1]=start | 145 elems[end_col_1] = start |
144 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | 146 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
145 fo.write( "%s\n" % '\t'.join( elems ) ) | 147 fo.write( "%s\n" % '\t'.join( elems ) ) |
146 elems[start_col_1]=start | 148 elems[start_col_1] = start |
147 elems[end_col_1]=end2 | 149 elems[end_col_1] = end2 |
148 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | 150 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
149 fo.write( "%s\n" % '\t'.join( elems ) ) | 151 fo.write( "%s\n" % '\t'.join( elems ) ) |
150 elif region == 'end': | 152 elif region == 'end': |
151 start = str(int(elems[end_col_1]) + offset) | 153 start = str(int(elems[end_col_1]) + offset) |
152 end1 = str(int(start) - size) | 154 end1 = str(int(start) - size) |
153 end2 = str(int(start) + size) | 155 end2 = str(int(start) + size) |
154 elems[start_col_1]=end1 | 156 elems[start_col_1] = end1 |
155 elems[end_col_1]=start | 157 elems[end_col_1] = start |
156 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | 158 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
157 fo.write( "%s\n" % '\t'.join( elems ) ) | 159 fo.write( "%s\n" % '\t'.join( elems ) ) |
158 elems[start_col_1]=start | 160 elems[start_col_1] = start |
159 elems[end_col_1]=end2 | 161 elems[end_col_1] = end2 |
160 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | 162 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
161 fo.write( "%s\n" % '\t'.join( elems ) ) | 163 fo.write( "%s\n" % '\t'.join( elems ) ) |
162 else: | 164 else: |
163 start1 = str(int(elems[start_col_1]) + offset) | 165 start1 = str(int(elems[start_col_1]) + offset) |
164 end1 = str(int(start1) - size) | 166 end1 = str(int(start1) - size) |
165 start2 = str(int(elems[end_col_1]) + offset) | 167 start2 = str(int(elems[end_col_1]) + offset) |
166 end2 = str(int(start2) + size) | 168 end2 = str(int(start2) + size) |
167 elems[start_col_1]=end1 | 169 elems[start_col_1] = end1 |
168 elems[end_col_1]=start1 | 170 elems[end_col_1] = start1 |
169 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | 171 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
170 fo.write( "%s\n" % '\t'.join( elems ) ) | 172 fo.write( "%s\n" % '\t'.join( elems ) ) |
171 elems[start_col_1]=start2 | 173 elems[start_col_1] = start2 |
172 elems[end_col_1]=end2 | 174 elems[end_col_1] = end2 |
173 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | 175 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
174 fo.write( "%s\n" % '\t'.join( elems ) ) | 176 fo.write( "%s\n" % '\t'.join( elems ) ) |
175 except: | 177 except: |
176 skipped_lines += 1 | 178 skipped_lines += 1 |
177 if not invalid_line: | 179 if not invalid_line: |
181 | 183 |
182 if skipped_lines == j: | 184 if skipped_lines == j: |
183 stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." ) | 185 stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." ) |
184 if skipped_lines > 0: | 186 if skipped_lines > 0: |
185 print 'Skipped %d invalid lines starting with #%dL "%s"' % ( skipped_lines, first_invalid_line, invalid_line ) | 187 print 'Skipped %d invalid lines starting with #%dL "%s"' % ( skipped_lines, first_invalid_line, invalid_line ) |
186 print 'Location: %s, Region: %s, Flank-length: %d, Offset: %d ' %( direction, region, size, offset ) | 188 print 'Location: %s, Region: %s, Flank-length: %d, Offset: %d ' % ( direction, region, size, offset ) |
187 | 189 |
188 if __name__ == "__main__": | 190 if __name__ == "__main__": |
189 main() | 191 main() |