comparison windowSplitter.py @ 3:1011f028f09d draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
author devteam
date Wed, 03 Jun 2020 13:44:45 +0000
parents 4815516900d4
children
comparison
equal deleted inserted replaced
2:1fb384fb950e 3:1011f028f09d
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2
3 """ 2 """
4 Split into windows. 3 Split into windows.
5 4
6 usage: %prog input size out_file 5 usage: %prog input size out_file
7 -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file 6 -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file
8 """ 7 """
8 from __future__ import print_function
9 9
10 import sys 10 import sys
11 11
12 from galaxy import eggs
13 import pkg_resources
14 pkg_resources.require( "bx-python" )
15 from bx.cookbook import doc_optparse 12 from bx.cookbook import doc_optparse
16 from galaxy.tools.util.galaxyops import *
17 13
18 def stop_err( msg ): 14
19 sys.stderr.write( msg ) 15 # Default chrom, start, end, strand cols for a bed file
20 sys.exit() 16 BED_DEFAULT_COLS = 0, 1, 2, 5
17
18
19 def parse_cols_arg(cols):
20 """Parse a columns command line argument into a four-tuple"""
21 if cols:
22 # Handle case where no strand column included - in this case, cols
23 # looks something like 1,2,3,
24 if cols.endswith(','):
25 cols += '0'
26 col_list = [int(x) - 1 for x in cols.split(",")]
27 return col_list
28 else:
29 return BED_DEFAULT_COLS
21 30
22 31
23 def main(): 32 def main():
24 # Parsing Command Line here 33 # Parsing Command Line here
25 options, args = doc_optparse.parse( __doc__ ) 34 options, args = doc_optparse.parse(__doc__)
26 35
27 try: 36 try:
28 chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols ) 37 chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg(options.cols)
29 inp_file, winsize, out_file, makesliding, offset = args 38 inp_file, winsize, out_file, makesliding, offset = args
30 winsize = int(winsize) 39 winsize = int(winsize)
31 offset = int(offset) 40 offset = int(offset)
32 makesliding = int(makesliding) 41 makesliding = int(makesliding)
33 except: 42 except Exception:
34 stop_err( "Data issue, click the pencil icon in the history item to correct the metadata attributes of the input dataset." ) 43 sys.exit("Data issue, click the pencil icon in the history item to correct the metadata attributes of the input dataset.")
35
36 fo = open(out_file,'w')
37 44
38 skipped_lines = 0 45 skipped_lines = 0
39 first_invalid_line = 0 46 first_invalid_line = 0
40 invalid_line = None 47 invalid_line = None
41 if offset == 0: 48 if offset == 0:
42 makesliding = 0 49 makesliding = 0
43 50
44 for i, line in enumerate( file( inp_file ) ): 51 with open(out_file, 'w') as fo, open(inp_file) as fi:
45 line = line.strip() 52 for i, line in enumerate(fi):
46 if line and line[0:1] != "#": 53 line = line.strip()
47 try: 54 if line and line[0:1] != "#":
48 elems = line.split('\t') 55 try:
49 start = int(elems[start_col_1]) 56 elems = line.split('\t')
50 end = int(elems[end_col_1]) 57 start = int(elems[start_col_1])
51 if makesliding == 0: 58 end = int(elems[end_col_1])
52 numwin = (end - start)/winsize 59 if makesliding == 0:
53 else: 60 numwin = (end - start) // winsize
54 numwin = (end - start)/offset 61 else:
55 if numwin > 0: 62 numwin = (end - start) // offset
56 for win in range(numwin): 63 if numwin > 0:
57 elems_1 = elems 64 for _ in range(numwin):
58 elems_1[start_col_1] = str(start) 65 elems_1 = elems
59 elems_1[end_col_1] = str(start + winsize) 66 elems_1[start_col_1] = str(start)
60 fo.write( "%s\n" % '\t'.join( elems_1 ) ) 67 elems_1[end_col_1] = str(start + winsize)
61 if makesliding == 0: 68 fo.write("%s\n" % '\t'.join(elems_1))
62 start = start + winsize 69 if makesliding == 0:
63 else: 70 start = start + winsize
64 start = start + offset 71 else:
65 if start+winsize > end: 72 start = start + offset
66 break 73 if start + winsize > end:
67 except: 74 break
68 skipped_lines += 1 75 except Exception:
69 if not invalid_line: 76 skipped_lines += 1
70 first_invalid_line = i + 1 77 if not invalid_line:
71 invalid_line = line 78 first_invalid_line = i + 1
72 79 invalid_line = line
73 fo.close()
74 80
75 if makesliding == 1: 81 if makesliding == 1:
76 print 'Window size=%d, Sliding=Yes, Offset=%d' % ( winsize, offset ) 82 print('Window size=%d, Sliding=Yes, Offset=%d' % (winsize, offset))
77 else: 83 else:
78 print 'Window size=%d, Sliding=No' % (winsize) 84 print('Window size=%d, Sliding=No' % (winsize))
79 if skipped_lines > 0: 85 if skipped_lines > 0:
80 print 'Skipped %d invalid lines starting with #%d: "%s"' % ( skipped_lines, first_invalid_line, invalid_line ) 86 print('Skipped %d invalid lines starting with #%d: "%s"' % (skipped_lines, first_invalid_line, invalid_line))
81 87
82 88
83 if __name__ == "__main__": 89 if __name__ == "__main__":
84 main() 90 main()