diff convert_bc_to_binary_RY.py @ 2:de4ea3aa1090 draft

Uploaded
author rnateam
date Thu, 22 Oct 2015 10:26:45 -0400
parents
children 17ef0e0dae68
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/convert_bc_to_binary_RY.py	Thu Oct 22 10:26:45 2015 -0400
@@ -0,0 +1,101 @@
+#!/usr/bin/env python
+
+tool_description = """
+Convert standard nucleotides to IUPAC nucleotide codes used for binary barcodes.
+
+A and G are converted to nucleotide code R. T, U and C are converted to Y. By
+default output is written to stdout.
+
+Example usage:
+- write converted sequences from file in.fa to file file out.fa:
+convert_bc_to_binary_RY.py in.fa --outfile out.fa
+"""
+
+epilog = """
+Author: Daniel Maticzka
+Copyright: 2015
+License: Apache
+Email: maticzkd@informatik.uni-freiburg.de
+Status: Testing
+"""
+
+import argparse
+import logging
+from string import maketrans
+from sys import stdout
+from Bio import SeqIO
+from Bio.Seq import Seq
+from Bio.Alphabet import IUPAC
+
+# # avoid ugly python IOError when stdout output is piped into another program
+# # and then truncated (such as piping to head)
+# from signal import signal, SIGPIPE, SIG_DFL
+# signal(SIGPIPE, SIG_DFL)
+
+# parse command line arguments
+parser = argparse.ArgumentParser(description=tool_description,
+                                 epilog=epilog,
+                                 formatter_class=argparse.RawDescriptionHelpFormatter)
+# positional arguments
+parser.add_argument(
+    "infile",
+    help="Path to fasta input file.")
+# optional arguments
+parser.add_argument(
+    "-o", "--outfile",
+    help="Write results to this file.")
+parser.add_argument(
+    "-v", "--verbose",
+    help="Be verbose.",
+    action="store_true")
+parser.add_argument(
+    "-d", "--debug",
+    help="Print lots of debugging information",
+    action="store_true")
+parser.add_argument(
+    '--version',
+    action='version',
+    version='0.1.0')
+
+
+def translate_nt_to_RY(seq):
+    """Translates nucleotides to RY (A,G -> R; C,U,T -> Y).
+
+    >>> translate_nt_to_RY("ACGUTACGUT")
+    RYRYYRYRYY
+    """
+    trans_table = maketrans("AGCUT", "RRYYY")
+    trans_seq = seq.translate(trans_table)
+    logging.debug(seq + " -> " + trans_seq)
+    return trans_seq
+
+
+def translate_nt_to_RY_iterator(robj):
+    """Translate SeqRecords sequences to RY alphabet."""
+    for record in robj:
+        record.seq = Seq(translate_nt_to_RY(str(record.seq)),
+                         IUPAC.unambiguous_dna)
+        yield record
+
+# handle arguments
+args = parser.parse_args()
+if args.debug:
+    logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(filename)s - %(levelname)s - %(message)s")
+elif args.verbose:
+    logging.basicConfig(level=logging.INFO, format="%(filename)s - %(levelname)s - %(message)s")
+else:
+    logging.basicConfig(format="%(filename)s - %(levelname)s - %(message)s")
+logging.info("Parsed arguments:")
+if args.outfile:
+    logging.info("  outfile: enabled writing to file")
+    logging.info("  outfile: '{}'".format(args.outfile))
+logging.info("  outfile: '{}'".format(args.outfile))
+logging.info("")
+
+# get input iterator
+input_handle = open(args.infile, "rU")
+input_seq_iterator = SeqIO.parse(input_handle, "fasta")
+convert_seq_iterator = translate_nt_to_RY_iterator(input_seq_iterator)
+output_handle = (open(args.outfile, "w") if args.outfile is not None else stdout)
+SeqIO.write(convert_seq_iterator, output_handle, "fasta")
+output_handle.close()