Mercurial > repos > rnateam > bctools
comparison convert_bc_to_binary_RY.py @ 2:de4ea3aa1090 draft
Uploaded
| author | rnateam |
|---|---|
| date | Thu, 22 Oct 2015 10:26:45 -0400 |
| parents | |
| children | 17ef0e0dae68 |
comparison
equal
deleted
inserted
replaced
| 1:ae0f58d3318f | 2:de4ea3aa1090 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 tool_description = """ | |
| 4 Convert standard nucleotides to IUPAC nucleotide codes used for binary barcodes. | |
| 5 | |
| 6 A and G are converted to nucleotide code R. T, U and C are converted to Y. By | |
| 7 default output is written to stdout. | |
| 8 | |
| 9 Example usage: | |
| 10 - write converted sequences from file in.fa to file file out.fa: | |
| 11 convert_bc_to_binary_RY.py in.fa --outfile out.fa | |
| 12 """ | |
| 13 | |
| 14 epilog = """ | |
| 15 Author: Daniel Maticzka | |
| 16 Copyright: 2015 | |
| 17 License: Apache | |
| 18 Email: maticzkd@informatik.uni-freiburg.de | |
| 19 Status: Testing | |
| 20 """ | |
| 21 | |
| 22 import argparse | |
| 23 import logging | |
| 24 from string import maketrans | |
| 25 from sys import stdout | |
| 26 from Bio import SeqIO | |
| 27 from Bio.Seq import Seq | |
| 28 from Bio.Alphabet import IUPAC | |
| 29 | |
| 30 # # avoid ugly python IOError when stdout output is piped into another program | |
| 31 # # and then truncated (such as piping to head) | |
| 32 # from signal import signal, SIGPIPE, SIG_DFL | |
| 33 # signal(SIGPIPE, SIG_DFL) | |
| 34 | |
| 35 # parse command line arguments | |
| 36 parser = argparse.ArgumentParser(description=tool_description, | |
| 37 epilog=epilog, | |
| 38 formatter_class=argparse.RawDescriptionHelpFormatter) | |
| 39 # positional arguments | |
| 40 parser.add_argument( | |
| 41 "infile", | |
| 42 help="Path to fasta input file.") | |
| 43 # optional arguments | |
| 44 parser.add_argument( | |
| 45 "-o", "--outfile", | |
| 46 help="Write results to this file.") | |
| 47 parser.add_argument( | |
| 48 "-v", "--verbose", | |
| 49 help="Be verbose.", | |
| 50 action="store_true") | |
| 51 parser.add_argument( | |
| 52 "-d", "--debug", | |
| 53 help="Print lots of debugging information", | |
| 54 action="store_true") | |
| 55 parser.add_argument( | |
| 56 '--version', | |
| 57 action='version', | |
| 58 version='0.1.0') | |
| 59 | |
| 60 | |
| 61 def translate_nt_to_RY(seq): | |
| 62 """Translates nucleotides to RY (A,G -> R; C,U,T -> Y). | |
| 63 | |
| 64 >>> translate_nt_to_RY("ACGUTACGUT") | |
| 65 RYRYYRYRYY | |
| 66 """ | |
| 67 trans_table = maketrans("AGCUT", "RRYYY") | |
| 68 trans_seq = seq.translate(trans_table) | |
| 69 logging.debug(seq + " -> " + trans_seq) | |
| 70 return trans_seq | |
| 71 | |
| 72 | |
| 73 def translate_nt_to_RY_iterator(robj): | |
| 74 """Translate SeqRecords sequences to RY alphabet.""" | |
| 75 for record in robj: | |
| 76 record.seq = Seq(translate_nt_to_RY(str(record.seq)), | |
| 77 IUPAC.unambiguous_dna) | |
| 78 yield record | |
| 79 | |
| 80 # handle arguments | |
| 81 args = parser.parse_args() | |
| 82 if args.debug: | |
| 83 logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(filename)s - %(levelname)s - %(message)s") | |
| 84 elif args.verbose: | |
| 85 logging.basicConfig(level=logging.INFO, format="%(filename)s - %(levelname)s - %(message)s") | |
| 86 else: | |
| 87 logging.basicConfig(format="%(filename)s - %(levelname)s - %(message)s") | |
| 88 logging.info("Parsed arguments:") | |
| 89 if args.outfile: | |
| 90 logging.info(" outfile: enabled writing to file") | |
| 91 logging.info(" outfile: '{}'".format(args.outfile)) | |
| 92 logging.info(" outfile: '{}'".format(args.outfile)) | |
| 93 logging.info("") | |
| 94 | |
| 95 # get input iterator | |
| 96 input_handle = open(args.infile, "rU") | |
| 97 input_seq_iterator = SeqIO.parse(input_handle, "fasta") | |
| 98 convert_seq_iterator = translate_nt_to_RY_iterator(input_seq_iterator) | |
| 99 output_handle = (open(args.outfile, "w") if args.outfile is not None else stdout) | |
| 100 SeqIO.write(convert_seq_iterator, output_handle, "fasta") | |
| 101 output_handle.close() |
