Mercurial > repos > rnateam > bctools
comparison convert_bc_to_binary_RY.py @ 2:de4ea3aa1090 draft
Uploaded
author | rnateam |
---|---|
date | Thu, 22 Oct 2015 10:26:45 -0400 |
parents | |
children | 17ef0e0dae68 |
comparison
equal
deleted
inserted
replaced
1:ae0f58d3318f | 2:de4ea3aa1090 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 tool_description = """ | |
4 Convert standard nucleotides to IUPAC nucleotide codes used for binary barcodes. | |
5 | |
6 A and G are converted to nucleotide code R. T, U and C are converted to Y. By | |
7 default output is written to stdout. | |
8 | |
9 Example usage: | |
10 - write converted sequences from file in.fa to file file out.fa: | |
11 convert_bc_to_binary_RY.py in.fa --outfile out.fa | |
12 """ | |
13 | |
14 epilog = """ | |
15 Author: Daniel Maticzka | |
16 Copyright: 2015 | |
17 License: Apache | |
18 Email: maticzkd@informatik.uni-freiburg.de | |
19 Status: Testing | |
20 """ | |
21 | |
22 import argparse | |
23 import logging | |
24 from string import maketrans | |
25 from sys import stdout | |
26 from Bio import SeqIO | |
27 from Bio.Seq import Seq | |
28 from Bio.Alphabet import IUPAC | |
29 | |
30 # # avoid ugly python IOError when stdout output is piped into another program | |
31 # # and then truncated (such as piping to head) | |
32 # from signal import signal, SIGPIPE, SIG_DFL | |
33 # signal(SIGPIPE, SIG_DFL) | |
34 | |
35 # parse command line arguments | |
36 parser = argparse.ArgumentParser(description=tool_description, | |
37 epilog=epilog, | |
38 formatter_class=argparse.RawDescriptionHelpFormatter) | |
39 # positional arguments | |
40 parser.add_argument( | |
41 "infile", | |
42 help="Path to fasta input file.") | |
43 # optional arguments | |
44 parser.add_argument( | |
45 "-o", "--outfile", | |
46 help="Write results to this file.") | |
47 parser.add_argument( | |
48 "-v", "--verbose", | |
49 help="Be verbose.", | |
50 action="store_true") | |
51 parser.add_argument( | |
52 "-d", "--debug", | |
53 help="Print lots of debugging information", | |
54 action="store_true") | |
55 parser.add_argument( | |
56 '--version', | |
57 action='version', | |
58 version='0.1.0') | |
59 | |
60 | |
61 def translate_nt_to_RY(seq): | |
62 """Translates nucleotides to RY (A,G -> R; C,U,T -> Y). | |
63 | |
64 >>> translate_nt_to_RY("ACGUTACGUT") | |
65 RYRYYRYRYY | |
66 """ | |
67 trans_table = maketrans("AGCUT", "RRYYY") | |
68 trans_seq = seq.translate(trans_table) | |
69 logging.debug(seq + " -> " + trans_seq) | |
70 return trans_seq | |
71 | |
72 | |
73 def translate_nt_to_RY_iterator(robj): | |
74 """Translate SeqRecords sequences to RY alphabet.""" | |
75 for record in robj: | |
76 record.seq = Seq(translate_nt_to_RY(str(record.seq)), | |
77 IUPAC.unambiguous_dna) | |
78 yield record | |
79 | |
80 # handle arguments | |
81 args = parser.parse_args() | |
82 if args.debug: | |
83 logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(filename)s - %(levelname)s - %(message)s") | |
84 elif args.verbose: | |
85 logging.basicConfig(level=logging.INFO, format="%(filename)s - %(levelname)s - %(message)s") | |
86 else: | |
87 logging.basicConfig(format="%(filename)s - %(levelname)s - %(message)s") | |
88 logging.info("Parsed arguments:") | |
89 if args.outfile: | |
90 logging.info(" outfile: enabled writing to file") | |
91 logging.info(" outfile: '{}'".format(args.outfile)) | |
92 logging.info(" outfile: '{}'".format(args.outfile)) | |
93 logging.info("") | |
94 | |
95 # get input iterator | |
96 input_handle = open(args.infile, "rU") | |
97 input_seq_iterator = SeqIO.parse(input_handle, "fasta") | |
98 convert_seq_iterator = translate_nt_to_RY_iterator(input_seq_iterator) | |
99 output_handle = (open(args.outfile, "w") if args.outfile is not None else stdout) | |
100 SeqIO.write(convert_seq_iterator, output_handle, "fasta") | |
101 output_handle.close() |