annotate coords2clnt.py @ 58:bbbae1ee87e0 draft default tip

fix for flexbar with small data issue
author rnateam
date Tue, 16 Feb 2016 10:08:58 -0500
parents 0b9aab6aaebf
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
1 #!/usr/bin/env python
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
2
50
0b9aab6aaebf Uploaded 16cfcafe8b42055c5dd64e62c42b82b455027a40
rnateam
parents: 2
diff changeset
3 import argparse
0b9aab6aaebf Uploaded 16cfcafe8b42055c5dd64e62c42b82b455027a40
rnateam
parents: 2
diff changeset
4 import logging
0b9aab6aaebf Uploaded 16cfcafe8b42055c5dd64e62c42b82b455027a40
rnateam
parents: 2
diff changeset
5 from sys import stdout
0b9aab6aaebf Uploaded 16cfcafe8b42055c5dd64e62c42b82b455027a40
rnateam
parents: 2
diff changeset
6 from pybedtools import BedTool
0b9aab6aaebf Uploaded 16cfcafe8b42055c5dd64e62c42b82b455027a40
rnateam
parents: 2
diff changeset
7 from pybedtools.featurefuncs import five_prime
0b9aab6aaebf Uploaded 16cfcafe8b42055c5dd64e62c42b82b455027a40
rnateam
parents: 2
diff changeset
8 # avoid ugly python IOError when stdout output is piped into another program
0b9aab6aaebf Uploaded 16cfcafe8b42055c5dd64e62c42b82b455027a40
rnateam
parents: 2
diff changeset
9 # and then truncated (such as piping to head)
0b9aab6aaebf Uploaded 16cfcafe8b42055c5dd64e62c42b82b455027a40
rnateam
parents: 2
diff changeset
10 from signal import signal, SIGPIPE, SIG_DFL
0b9aab6aaebf Uploaded 16cfcafe8b42055c5dd64e62c42b82b455027a40
rnateam
parents: 2
diff changeset
11 signal(SIGPIPE, SIG_DFL)
0b9aab6aaebf Uploaded 16cfcafe8b42055c5dd64e62c42b82b455027a40
rnateam
parents: 2
diff changeset
12
2
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
13 tool_description = """
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
14 Given coordinates of the aligned reads, calculate positions of the crosslinked
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
15 nucleotides. Crosslinked nts are assumed to be one nt upstream of the 5'-end of
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
16 the read.
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
17
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
18 By default output is written to stdout.
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
19
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
20 Input:
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
21 * bed6 file containing coordinates of aligned reads
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
22 * bed6 file containing coordinates of crosslinking events
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
23
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
24 Example usage:
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
25 - convert read coordinates from file in.bed to coordinates of the crosslinking
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
26 events, written to out.bed:
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
27 coords2clnt.py in.bed --outfile out.bed
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
28 """
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
29
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
30 epilog = """
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
31 Author: Daniel Maticzka
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
32 Copyright: 2015
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
33 License: Apache
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
34 Email: maticzkd@informatik.uni-freiburg.de
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
35 Status: Testing
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
36 """
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
37
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
38 # parse command line arguments
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
39 parser = argparse.ArgumentParser(description=tool_description,
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
40 epilog=epilog,
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
41 formatter_class=argparse.RawDescriptionHelpFormatter)
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
42 # positional arguments
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
43 parser.add_argument(
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
44 "infile",
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
45 help="Path to bed input file.")
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
46 # optional arguments
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
47 parser.add_argument(
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
48 "-o", "--outfile",
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
49 help="Write results to this file.")
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
50 parser.add_argument(
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
51 "-v", "--verbose",
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
52 help="Be verbose.",
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
53 action="store_true")
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
54 parser.add_argument(
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
55 "-d", "--debug",
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
56 help="Print lots of debugging information",
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
57 action="store_true")
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
58 parser.add_argument(
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
59 '--version',
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
60 action='version',
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
61 version='0.1.0')
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
62
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
63
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
64 # handle arguments
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
65 args = parser.parse_args()
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
66 if args.debug:
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
67 logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(filename)s - %(levelname)s - %(message)s")
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
68 elif args.verbose:
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
69 logging.basicConfig(level=logging.INFO, format="%(filename)s - %(levelname)s - %(message)s")
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
70 else:
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
71 logging.basicConfig(format="%(filename)s - %(levelname)s - %(message)s")
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
72 logging.info("Parsed arguments:")
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
73 if args.outfile:
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
74 logging.info(" outfile: enabled writing to file")
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
75 logging.info(" outfile: '{}'".format(args.outfile))
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
76 logging.info(" outfile: '{}'".format(args.outfile))
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
77 logging.info("")
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
78
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
79 # data processing
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
80 alns = BedTool(args.infile)
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
81 clnts = alns.each(five_prime, upstream=1, downstream=0)
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
82
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
83 # write to file or to stdout
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
84 if args.outfile:
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
85 clnts.saveas(args.outfile)
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
86 else:
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
87 tmptool = clnts.saveas()
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
88 logging.debug("results written to temporary file :" + tmptool.fn)
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
89 tmp = open(tmptool.fn)
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
90 for line in tmp:
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
91 stdout.write(line)
de4ea3aa1090 Uploaded
rnateam
parents:
diff changeset
92 tmp.close()