comparison get_flanks.py @ 3:2fdec558c935 draft

planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
author devteam
date Tue, 13 Oct 2015 12:50:28 -0400
parents 0c66884f0cac
children dd9315a56c09
comparison
equal deleted inserted replaced
2:0eae46dcc35c 3:2fdec558c935
7 usage: %prog input out_file size direction region 7 usage: %prog input out_file size direction region
8 -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file 8 -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file
9 -o, --off=N: Offset 9 -o, --off=N: Offset
10 """ 10 """
11 11
12 import sys, re, os 12 import sys
13 from bx.cookbook import doc_optparse 13 from bx.cookbook import doc_optparse
14 from galaxy.tools.util.galaxyops import * 14 from galaxy.tools.util.galaxyops import parse_cols_arg
15
15 16
16 def stop_err( msg ): 17 def stop_err( msg ):
17 sys.stderr.write( msg ) 18 sys.stderr.write( msg )
18 sys.exit() 19 sys.exit()
19 20
20 def main(): 21
22 def main():
21 try: 23 try:
22 if int( sys.argv[3] ) < 0: 24 if int( sys.argv[3] ) < 0:
23 raise Exception 25 raise Exception
24 except: 26 except:
25 stop_err( "Length of flanking region(s) must be a non-negative integer." ) 27 stop_err( "Length of flanking region(s) must be a non-negative integer." )
28 options, args = doc_optparse.parse( __doc__ ) 30 options, args = doc_optparse.parse( __doc__ )
29 try: 31 try:
30 chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols ) 32 chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols )
31 inp_file, out_file, size, direction, region = args 33 inp_file, out_file, size, direction, region = args
32 if strand_col_1 <= 0: 34 if strand_col_1 <= 0:
33 strand = "+" #if strand is not defined, default it to + 35 strand = "+" # if strand is not defined, default it to +
34 except: 36 except:
35 stop_err( "Metadata issue, correct the metadata attributes by clicking on the pencil icon in the history item." ) 37 stop_err( "Metadata issue, correct the metadata attributes by clicking on the pencil icon in the history item." )
36 try: 38 try:
37 offset = int(options.off) 39 offset = int(options.off)
38 size = int(size) 40 size = int(size)
39 except: 41 except:
40 stop_err( "Invalid offset or length entered. Try again by entering valid integer values." ) 42 stop_err( "Invalid offset or length entered. Try again by entering valid integer values." )
41 43
42 fo = open(out_file,'w') 44 fo = open(out_file, 'w')
43 45
44 skipped_lines = 0 46 skipped_lines = 0
45 first_invalid_line = 0 47 first_invalid_line = 0
46 invalid_line = None 48 invalid_line = None
47 elems = [] 49 elems = []
48 j=0 50 j = 0
49 for i, line in enumerate( file( inp_file ) ): 51 for i, line in enumerate( file( inp_file ) ):
50 line = line.strip() 52 line = line.strip()
51 if line and (not line.startswith( '#' )) and line != '': 53 if line and (not line.startswith( '#' )) and line != '':
52 j+=1 54 j += 1
53 try: 55 try:
54 elems = line.split('\t') 56 elems = line.split('\t')
55 #if the start and/or end columns are not numbers, skip that line. 57 #if the start and/or end columns are not numbers, skip that line.
56 assert int(elems[start_col_1]) 58 assert int(elems[start_col_1])
57 assert int(elems[end_col_1]) 59 assert int(elems[end_col_1])
74 else: 76 else:
75 elems[start_col_1] = str(int(elems[end_col_1]) - offset) 77 elems[start_col_1] = str(int(elems[end_col_1]) - offset)
76 elems[end_col_1] = str(int(elems[start_col_1]) + size) 78 elems[end_col_1] = str(int(elems[start_col_1]) + size)
77 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 79 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
78 fo.write( "%s\n" % '\t'.join( elems ) ) 80 fo.write( "%s\n" % '\t'.join( elems ) )
79 81
80 elif direction == 'Downstream': 82 elif direction == 'Downstream':
81 if strand == '-': 83 if strand == '-':
82 if region == 'start': 84 if region == 'start':
83 elems[end_col_1] = str(int(elems[end_col_1]) - offset) 85 elems[end_col_1] = str(int(elems[end_col_1]) - offset)
84 elems[start_col_1] = str( int(elems[end_col_1]) - size ) 86 elems[start_col_1] = str( int(elems[end_col_1]) - size )
85 else: 87 else:
86 elems[end_col_1] = str(int(elems[start_col_1]) - offset) 88 elems[end_col_1] = str(int(elems[start_col_1]) - offset)
87 elems[start_col_1] = str( int(elems[end_col_1]) - size ) 89 elems[start_col_1] = str( int(elems[end_col_1]) - size )
88 elif strand == '+': 90 elif strand == '+':
89 if region == 'start': 91 if region == 'start':
90 elems[start_col_1] = str(int(elems[start_col_1]) + offset) 92 elems[start_col_1] = str(int(elems[start_col_1]) + offset)
91 elems[end_col_1] = str(int(elems[start_col_1]) + size) 93 elems[end_col_1] = str(int(elems[start_col_1]) + size)
92 else: 94 else:
93 elems[start_col_1] = str(int(elems[end_col_1]) + offset) 95 elems[start_col_1] = str(int(elems[end_col_1]) + offset)
94 elems[end_col_1] = str(int(elems[start_col_1]) + size) 96 elems[end_col_1] = str(int(elems[start_col_1]) + size)
95 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 97 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
96 fo.write( "%s\n" % '\t'.join( elems ) ) 98 fo.write( "%s\n" % '\t'.join( elems ) )
97 99
98 elif direction == 'Both': 100 elif direction == 'Both':
99 if strand == '-': 101 if strand == '-':
100 if region == 'start': 102 if region == 'start':
101 start = str(int(elems[end_col_1]) - offset) 103 start = str(int(elems[end_col_1]) - offset)
102 end1 = str(int(start) + size) 104 end1 = str(int(start) + size)
103 end2 = str(int(start) - size) 105 end2 = str(int(start) - size)
104 elems[start_col_1]=start 106 elems[start_col_1] = start
105 elems[end_col_1]=end1 107 elems[end_col_1] = end1
106 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 108 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
107 fo.write( "%s\n" % '\t'.join( elems ) ) 109 fo.write( "%s\n" % '\t'.join( elems ) )
108 elems[start_col_1]=end2 110 elems[start_col_1] = end2
109 elems[end_col_1]=start 111 elems[end_col_1] = start
110 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 112 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
111 fo.write( "%s\n" % '\t'.join( elems ) ) 113 fo.write( "%s\n" % '\t'.join( elems ) )
112 elif region == 'end': 114 elif region == 'end':
113 start = str(int(elems[start_col_1]) - offset) 115 start = str(int(elems[start_col_1]) - offset)
114 end1 = str(int(start) + size) 116 end1 = str(int(start) + size)
115 end2 = str(int(start) - size) 117 end2 = str(int(start) - size)
116 elems[start_col_1]=start 118 elems[start_col_1] = start
117 elems[end_col_1]=end1 119 elems[end_col_1] = end1
118 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 120 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
119 fo.write( "%s\n" % '\t'.join( elems ) ) 121 fo.write( "%s\n" % '\t'.join( elems ) )
120 elems[start_col_1]=end2 122 elems[start_col_1] = end2
121 elems[end_col_1]=start 123 elems[end_col_1] = start
122 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 124 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
123 fo.write( "%s\n" % '\t'.join( elems ) ) 125 fo.write( "%s\n" % '\t'.join( elems ) )
124 else: 126 else:
125 start1 = str(int(elems[end_col_1]) - offset) 127 start1 = str(int(elems[end_col_1]) - offset)
126 end1 = str(int(start1) + size) 128 end1 = str(int(start1) + size)
127 start2 = str(int(elems[start_col_1]) - offset) 129 start2 = str(int(elems[start_col_1]) - offset)
128 end2 = str(int(start2) - size) 130 end2 = str(int(start2) - size)
129 elems[start_col_1]=start1 131 elems[start_col_1] = start1
130 elems[end_col_1]=end1 132 elems[end_col_1] = end1
131 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 133 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
132 fo.write( "%s\n" % '\t'.join( elems ) ) 134 fo.write( "%s\n" % '\t'.join( elems ) )
133 elems[start_col_1]=end2 135 elems[start_col_1] = end2
134 elems[end_col_1]=start2 136 elems[end_col_1] = start2
135 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 137 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
136 fo.write( "%s\n" % '\t'.join( elems ) ) 138 fo.write( "%s\n" % '\t'.join( elems ) )
137 elif strand == '+': 139 elif strand == '+':
138 if region == 'start': 140 if region == 'start':
139 start = str(int(elems[start_col_1]) + offset) 141 start = str(int(elems[start_col_1]) + offset)
140 end1 = str(int(start) - size) 142 end1 = str(int(start) - size)
141 end2 = str(int(start) + size) 143 end2 = str(int(start) + size)
142 elems[start_col_1]=end1 144 elems[start_col_1] = end1
143 elems[end_col_1]=start 145 elems[end_col_1] = start
144 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 146 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
145 fo.write( "%s\n" % '\t'.join( elems ) ) 147 fo.write( "%s\n" % '\t'.join( elems ) )
146 elems[start_col_1]=start 148 elems[start_col_1] = start
147 elems[end_col_1]=end2 149 elems[end_col_1] = end2
148 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 150 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
149 fo.write( "%s\n" % '\t'.join( elems ) ) 151 fo.write( "%s\n" % '\t'.join( elems ) )
150 elif region == 'end': 152 elif region == 'end':
151 start = str(int(elems[end_col_1]) + offset) 153 start = str(int(elems[end_col_1]) + offset)
152 end1 = str(int(start) - size) 154 end1 = str(int(start) - size)
153 end2 = str(int(start) + size) 155 end2 = str(int(start) + size)
154 elems[start_col_1]=end1 156 elems[start_col_1] = end1
155 elems[end_col_1]=start 157 elems[end_col_1] = start
156 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 158 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
157 fo.write( "%s\n" % '\t'.join( elems ) ) 159 fo.write( "%s\n" % '\t'.join( elems ) )
158 elems[start_col_1]=start 160 elems[start_col_1] = start
159 elems[end_col_1]=end2 161 elems[end_col_1] = end2
160 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 162 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
161 fo.write( "%s\n" % '\t'.join( elems ) ) 163 fo.write( "%s\n" % '\t'.join( elems ) )
162 else: 164 else:
163 start1 = str(int(elems[start_col_1]) + offset) 165 start1 = str(int(elems[start_col_1]) + offset)
164 end1 = str(int(start1) - size) 166 end1 = str(int(start1) - size)
165 start2 = str(int(elems[end_col_1]) + offset) 167 start2 = str(int(elems[end_col_1]) + offset)
166 end2 = str(int(start2) + size) 168 end2 = str(int(start2) + size)
167 elems[start_col_1]=end1 169 elems[start_col_1] = end1
168 elems[end_col_1]=start1 170 elems[end_col_1] = start1
169 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 171 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
170 fo.write( "%s\n" % '\t'.join( elems ) ) 172 fo.write( "%s\n" % '\t'.join( elems ) )
171 elems[start_col_1]=start2 173 elems[start_col_1] = start2
172 elems[end_col_1]=end2 174 elems[end_col_1] = end2
173 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 175 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0
174 fo.write( "%s\n" % '\t'.join( elems ) ) 176 fo.write( "%s\n" % '\t'.join( elems ) )
175 except: 177 except:
176 skipped_lines += 1 178 skipped_lines += 1
177 if not invalid_line: 179 if not invalid_line:
181 183
182 if skipped_lines == j: 184 if skipped_lines == j:
183 stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." ) 185 stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." )
184 if skipped_lines > 0: 186 if skipped_lines > 0:
185 print 'Skipped %d invalid lines starting with #%dL "%s"' % ( skipped_lines, first_invalid_line, invalid_line ) 187 print 'Skipped %d invalid lines starting with #%dL "%s"' % ( skipped_lines, first_invalid_line, invalid_line )
186 print 'Location: %s, Region: %s, Flank-length: %d, Offset: %d ' %( direction, region, size, offset ) 188 print 'Location: %s, Region: %s, Flank-length: %d, Offset: %d ' % ( direction, region, size, offset )
187 189
188 if __name__ == "__main__": 190 if __name__ == "__main__":
189 main() 191 main()