2
|
1 #!/usr/bin/env python
|
|
2
|
|
3 tool_description = """
|
|
4 Convert standard nucleotides to IUPAC nucleotide codes used for binary barcodes.
|
|
5
|
|
6 A and G are converted to nucleotide code R. T, U and C are converted to Y. By
|
|
7 default output is written to stdout.
|
|
8
|
|
9 Example usage:
|
|
10 - write converted sequences from file in.fa to file file out.fa:
|
|
11 convert_bc_to_binary_RY.py in.fa --outfile out.fa
|
|
12 """
|
|
13
|
|
14 epilog = """
|
|
15 Author: Daniel Maticzka
|
|
16 Copyright: 2015
|
|
17 License: Apache
|
|
18 Email: maticzkd@informatik.uni-freiburg.de
|
|
19 Status: Testing
|
|
20 """
|
|
21
|
|
22 import argparse
|
|
23 import logging
|
|
24 from string import maketrans
|
|
25 from sys import stdout
|
|
26 from Bio import SeqIO
|
|
27 from Bio.Seq import Seq
|
|
28 from Bio.Alphabet import IUPAC
|
|
29
|
|
30 # # avoid ugly python IOError when stdout output is piped into another program
|
|
31 # # and then truncated (such as piping to head)
|
|
32 # from signal import signal, SIGPIPE, SIG_DFL
|
|
33 # signal(SIGPIPE, SIG_DFL)
|
|
34
|
|
35 # parse command line arguments
|
|
36 parser = argparse.ArgumentParser(description=tool_description,
|
|
37 epilog=epilog,
|
|
38 formatter_class=argparse.RawDescriptionHelpFormatter)
|
|
39 # positional arguments
|
|
40 parser.add_argument(
|
|
41 "infile",
|
8
|
42 help="Path to fastq input file.")
|
2
|
43 # optional arguments
|
|
44 parser.add_argument(
|
|
45 "-o", "--outfile",
|
|
46 help="Write results to this file.")
|
|
47 parser.add_argument(
|
8
|
48 "--fasta-format",
|
|
49 dest="fasta_format",
|
|
50 help="Read and write fasta instead of fastq format.",
|
|
51 action="store_true")
|
|
52 parser.add_argument(
|
2
|
53 "-v", "--verbose",
|
|
54 help="Be verbose.",
|
|
55 action="store_true")
|
|
56 parser.add_argument(
|
|
57 "-d", "--debug",
|
|
58 help="Print lots of debugging information",
|
|
59 action="store_true")
|
|
60 parser.add_argument(
|
|
61 '--version',
|
|
62 action='version',
|
|
63 version='0.1.0')
|
|
64
|
|
65
|
|
66 def translate_nt_to_RY(seq):
|
|
67 """Translates nucleotides to RY (A,G -> R; C,U,T -> Y).
|
|
68
|
|
69 >>> translate_nt_to_RY("ACGUTACGUT")
|
|
70 RYRYYRYRYY
|
|
71 """
|
|
72 trans_table = maketrans("AGCUT", "RRYYY")
|
|
73 trans_seq = seq.translate(trans_table)
|
|
74 logging.debug(seq + " -> " + trans_seq)
|
|
75 return trans_seq
|
|
76
|
|
77
|
|
78 def translate_nt_to_RY_iterator(robj):
|
|
79 """Translate SeqRecords sequences to RY alphabet."""
|
|
80 for record in robj:
|
8
|
81 if not args.fasta_format:
|
|
82 saved_letter_annotations = record.letter_annotations
|
|
83 record.letter_annotations = {}
|
2
|
84 record.seq = Seq(translate_nt_to_RY(str(record.seq)),
|
|
85 IUPAC.unambiguous_dna)
|
8
|
86 if not args.fasta_format:
|
|
87 record.letter_annotations = saved_letter_annotations
|
2
|
88 yield record
|
|
89
|
|
90 # handle arguments
|
|
91 args = parser.parse_args()
|
|
92 if args.debug:
|
|
93 logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(filename)s - %(levelname)s - %(message)s")
|
|
94 elif args.verbose:
|
|
95 logging.basicConfig(level=logging.INFO, format="%(filename)s - %(levelname)s - %(message)s")
|
|
96 else:
|
|
97 logging.basicConfig(format="%(filename)s - %(levelname)s - %(message)s")
|
|
98 logging.info("Parsed arguments:")
|
|
99 if args.outfile:
|
|
100 logging.info(" outfile: enabled writing to file")
|
|
101 logging.info(" outfile: '{}'".format(args.outfile))
|
|
102 logging.info(" outfile: '{}'".format(args.outfile))
|
|
103 logging.info("")
|
|
104
|
|
105 # get input iterator
|
|
106 input_handle = open(args.infile, "rU")
|
8
|
107 if args.fasta_format:
|
|
108 input_seq_iterator = SeqIO.parse(input_handle, "fasta")
|
|
109 else:
|
|
110 input_seq_iterator = SeqIO.parse(input_handle, "fastq")
|
2
|
111 convert_seq_iterator = translate_nt_to_RY_iterator(input_seq_iterator)
|
|
112 output_handle = (open(args.outfile, "w") if args.outfile is not None else stdout)
|
8
|
113 if args.fasta_format:
|
|
114 SeqIO.write(convert_seq_iterator, output_handle, "fasta")
|
|
115 else:
|
|
116 SeqIO.write(convert_seq_iterator, output_handle, "fastq")
|
2
|
117 output_handle.close()
|