comparison convert_bc_to_binary_RY.py @ 2:de4ea3aa1090 draft

Uploaded
author rnateam
date Thu, 22 Oct 2015 10:26:45 -0400
parents
children 17ef0e0dae68
comparison
equal deleted inserted replaced
1:ae0f58d3318f 2:de4ea3aa1090
1 #!/usr/bin/env python
2
3 tool_description = """
4 Convert standard nucleotides to IUPAC nucleotide codes used for binary barcodes.
5
6 A and G are converted to nucleotide code R. T, U and C are converted to Y. By
7 default output is written to stdout.
8
9 Example usage:
10 - write converted sequences from file in.fa to file file out.fa:
11 convert_bc_to_binary_RY.py in.fa --outfile out.fa
12 """
13
14 epilog = """
15 Author: Daniel Maticzka
16 Copyright: 2015
17 License: Apache
18 Email: maticzkd@informatik.uni-freiburg.de
19 Status: Testing
20 """
21
22 import argparse
23 import logging
24 from string import maketrans
25 from sys import stdout
26 from Bio import SeqIO
27 from Bio.Seq import Seq
28 from Bio.Alphabet import IUPAC
29
30 # # avoid ugly python IOError when stdout output is piped into another program
31 # # and then truncated (such as piping to head)
32 # from signal import signal, SIGPIPE, SIG_DFL
33 # signal(SIGPIPE, SIG_DFL)
34
35 # parse command line arguments
36 parser = argparse.ArgumentParser(description=tool_description,
37 epilog=epilog,
38 formatter_class=argparse.RawDescriptionHelpFormatter)
39 # positional arguments
40 parser.add_argument(
41 "infile",
42 help="Path to fasta input file.")
43 # optional arguments
44 parser.add_argument(
45 "-o", "--outfile",
46 help="Write results to this file.")
47 parser.add_argument(
48 "-v", "--verbose",
49 help="Be verbose.",
50 action="store_true")
51 parser.add_argument(
52 "-d", "--debug",
53 help="Print lots of debugging information",
54 action="store_true")
55 parser.add_argument(
56 '--version',
57 action='version',
58 version='0.1.0')
59
60
61 def translate_nt_to_RY(seq):
62 """Translates nucleotides to RY (A,G -> R; C,U,T -> Y).
63
64 >>> translate_nt_to_RY("ACGUTACGUT")
65 RYRYYRYRYY
66 """
67 trans_table = maketrans("AGCUT", "RRYYY")
68 trans_seq = seq.translate(trans_table)
69 logging.debug(seq + " -> " + trans_seq)
70 return trans_seq
71
72
73 def translate_nt_to_RY_iterator(robj):
74 """Translate SeqRecords sequences to RY alphabet."""
75 for record in robj:
76 record.seq = Seq(translate_nt_to_RY(str(record.seq)),
77 IUPAC.unambiguous_dna)
78 yield record
79
80 # handle arguments
81 args = parser.parse_args()
82 if args.debug:
83 logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(filename)s - %(levelname)s - %(message)s")
84 elif args.verbose:
85 logging.basicConfig(level=logging.INFO, format="%(filename)s - %(levelname)s - %(message)s")
86 else:
87 logging.basicConfig(format="%(filename)s - %(levelname)s - %(message)s")
88 logging.info("Parsed arguments:")
89 if args.outfile:
90 logging.info(" outfile: enabled writing to file")
91 logging.info(" outfile: '{}'".format(args.outfile))
92 logging.info(" outfile: '{}'".format(args.outfile))
93 logging.info("")
94
95 # get input iterator
96 input_handle = open(args.infile, "rU")
97 input_seq_iterator = SeqIO.parse(input_handle, "fasta")
98 convert_seq_iterator = translate_nt_to_RY_iterator(input_seq_iterator)
99 output_handle = (open(args.outfile, "w") if args.outfile is not None else stdout)
100 SeqIO.write(convert_seq_iterator, output_handle, "fasta")
101 output_handle.close()