| 
2
 | 
     1 #!/usr/bin/env python
 | 
| 
 | 
     2 
 | 
| 
 | 
     3 tool_description = """
 | 
| 
 | 
     4 Convert standard nucleotides to IUPAC nucleotide codes used for binary barcodes.
 | 
| 
 | 
     5 
 | 
| 
 | 
     6 A and G are converted to nucleotide code R. T, U and C are converted to Y. By
 | 
| 
 | 
     7 default output is written to stdout.
 | 
| 
 | 
     8 
 | 
| 
 | 
     9 Example usage:
 | 
| 
 | 
    10 - write converted sequences from file in.fa to file file out.fa:
 | 
| 
 | 
    11 convert_bc_to_binary_RY.py in.fa --outfile out.fa
 | 
| 
 | 
    12 """
 | 
| 
 | 
    13 
 | 
| 
 | 
    14 epilog = """
 | 
| 
 | 
    15 Author: Daniel Maticzka
 | 
| 
 | 
    16 Copyright: 2015
 | 
| 
 | 
    17 License: Apache
 | 
| 
 | 
    18 Email: maticzkd@informatik.uni-freiburg.de
 | 
| 
 | 
    19 Status: Testing
 | 
| 
 | 
    20 """
 | 
| 
 | 
    21 
 | 
| 
 | 
    22 import argparse
 | 
| 
 | 
    23 import logging
 | 
| 
 | 
    24 from string import maketrans
 | 
| 
 | 
    25 from sys import stdout
 | 
| 
 | 
    26 from Bio import SeqIO
 | 
| 
 | 
    27 from Bio.Seq import Seq
 | 
| 
 | 
    28 from Bio.Alphabet import IUPAC
 | 
| 
 | 
    29 
 | 
| 
 | 
    30 # # avoid ugly python IOError when stdout output is piped into another program
 | 
| 
 | 
    31 # # and then truncated (such as piping to head)
 | 
| 
 | 
    32 # from signal import signal, SIGPIPE, SIG_DFL
 | 
| 
 | 
    33 # signal(SIGPIPE, SIG_DFL)
 | 
| 
 | 
    34 
 | 
| 
 | 
    35 # parse command line arguments
 | 
| 
 | 
    36 parser = argparse.ArgumentParser(description=tool_description,
 | 
| 
 | 
    37                                  epilog=epilog,
 | 
| 
 | 
    38                                  formatter_class=argparse.RawDescriptionHelpFormatter)
 | 
| 
 | 
    39 # positional arguments
 | 
| 
 | 
    40 parser.add_argument(
 | 
| 
 | 
    41     "infile",
 | 
| 
 | 
    42     help="Path to fasta input file.")
 | 
| 
 | 
    43 # optional arguments
 | 
| 
 | 
    44 parser.add_argument(
 | 
| 
 | 
    45     "-o", "--outfile",
 | 
| 
 | 
    46     help="Write results to this file.")
 | 
| 
 | 
    47 parser.add_argument(
 | 
| 
 | 
    48     "-v", "--verbose",
 | 
| 
 | 
    49     help="Be verbose.",
 | 
| 
 | 
    50     action="store_true")
 | 
| 
 | 
    51 parser.add_argument(
 | 
| 
 | 
    52     "-d", "--debug",
 | 
| 
 | 
    53     help="Print lots of debugging information",
 | 
| 
 | 
    54     action="store_true")
 | 
| 
 | 
    55 parser.add_argument(
 | 
| 
 | 
    56     '--version',
 | 
| 
 | 
    57     action='version',
 | 
| 
 | 
    58     version='0.1.0')
 | 
| 
 | 
    59 
 | 
| 
 | 
    60 
 | 
| 
 | 
    61 def translate_nt_to_RY(seq):
 | 
| 
 | 
    62     """Translates nucleotides to RY (A,G -> R; C,U,T -> Y).
 | 
| 
 | 
    63 
 | 
| 
 | 
    64     >>> translate_nt_to_RY("ACGUTACGUT")
 | 
| 
 | 
    65     RYRYYRYRYY
 | 
| 
 | 
    66     """
 | 
| 
 | 
    67     trans_table = maketrans("AGCUT", "RRYYY")
 | 
| 
 | 
    68     trans_seq = seq.translate(trans_table)
 | 
| 
 | 
    69     logging.debug(seq + " -> " + trans_seq)
 | 
| 
 | 
    70     return trans_seq
 | 
| 
 | 
    71 
 | 
| 
 | 
    72 
 | 
| 
 | 
    73 def translate_nt_to_RY_iterator(robj):
 | 
| 
 | 
    74     """Translate SeqRecords sequences to RY alphabet."""
 | 
| 
 | 
    75     for record in robj:
 | 
| 
 | 
    76         record.seq = Seq(translate_nt_to_RY(str(record.seq)),
 | 
| 
 | 
    77                          IUPAC.unambiguous_dna)
 | 
| 
 | 
    78         yield record
 | 
| 
 | 
    79 
 | 
| 
 | 
    80 # handle arguments
 | 
| 
 | 
    81 args = parser.parse_args()
 | 
| 
 | 
    82 if args.debug:
 | 
| 
 | 
    83     logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(filename)s - %(levelname)s - %(message)s")
 | 
| 
 | 
    84 elif args.verbose:
 | 
| 
 | 
    85     logging.basicConfig(level=logging.INFO, format="%(filename)s - %(levelname)s - %(message)s")
 | 
| 
 | 
    86 else:
 | 
| 
 | 
    87     logging.basicConfig(format="%(filename)s - %(levelname)s - %(message)s")
 | 
| 
 | 
    88 logging.info("Parsed arguments:")
 | 
| 
 | 
    89 if args.outfile:
 | 
| 
 | 
    90     logging.info("  outfile: enabled writing to file")
 | 
| 
 | 
    91     logging.info("  outfile: '{}'".format(args.outfile))
 | 
| 
 | 
    92 logging.info("  outfile: '{}'".format(args.outfile))
 | 
| 
 | 
    93 logging.info("")
 | 
| 
 | 
    94 
 | 
| 
 | 
    95 # get input iterator
 | 
| 
 | 
    96 input_handle = open(args.infile, "rU")
 | 
| 
 | 
    97 input_seq_iterator = SeqIO.parse(input_handle, "fasta")
 | 
| 
 | 
    98 convert_seq_iterator = translate_nt_to_RY_iterator(input_seq_iterator)
 | 
| 
 | 
    99 output_handle = (open(args.outfile, "w") if args.outfile is not None else stdout)
 | 
| 
 | 
   100 SeqIO.write(convert_seq_iterator, output_handle, "fasta")
 | 
| 
 | 
   101 output_handle.close()
 |