changeset 8:17ef0e0dae68 draft

Uploaded
author rnateam
date Tue, 10 Nov 2015 09:13:44 -0500
parents bb59215dfd8f
children b994884d5541
files convert_bc_to_binary_RY.py convert_bc_to_binary_RY.xml merge_pcr_duplicates.py merge_pcr_duplicates.xml
diffstat 4 files changed, 32 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/convert_bc_to_binary_RY.py	Tue Nov 10 08:12:25 2015 -0500
+++ b/convert_bc_to_binary_RY.py	Tue Nov 10 09:13:44 2015 -0500
@@ -39,12 +39,17 @@
 # positional arguments
 parser.add_argument(
     "infile",
-    help="Path to fasta input file.")
+    help="Path to fastq input file.")
 # optional arguments
 parser.add_argument(
     "-o", "--outfile",
     help="Write results to this file.")
 parser.add_argument(
+    "--fasta-format",
+    dest="fasta_format",
+    help="Read and write fasta instead of fastq format.",
+    action="store_true")
+parser.add_argument(
     "-v", "--verbose",
     help="Be verbose.",
     action="store_true")
@@ -73,8 +78,13 @@
 def translate_nt_to_RY_iterator(robj):
     """Translate SeqRecords sequences to RY alphabet."""
     for record in robj:
+        if not args.fasta_format:
+            saved_letter_annotations = record.letter_annotations
+        record.letter_annotations = {}
         record.seq = Seq(translate_nt_to_RY(str(record.seq)),
                          IUPAC.unambiguous_dna)
+        if not args.fasta_format:
+            record.letter_annotations = saved_letter_annotations
         yield record
 
 # handle arguments
@@ -94,8 +104,14 @@
 
 # get input iterator
 input_handle = open(args.infile, "rU")
-input_seq_iterator = SeqIO.parse(input_handle, "fasta")
+if args.fasta_format:
+    input_seq_iterator = SeqIO.parse(input_handle, "fasta")
+else:
+    input_seq_iterator = SeqIO.parse(input_handle, "fastq")
 convert_seq_iterator = translate_nt_to_RY_iterator(input_seq_iterator)
 output_handle = (open(args.outfile, "w") if args.outfile is not None else stdout)
-SeqIO.write(convert_seq_iterator, output_handle, "fasta")
+if args.fasta_format:
+    SeqIO.write(convert_seq_iterator, output_handle, "fasta")
+else:
+    SeqIO.write(convert_seq_iterator, output_handle, "fastq")
 output_handle.close()
--- a/convert_bc_to_binary_RY.xml	Tue Nov 10 08:12:25 2015 -0500
+++ b/convert_bc_to_binary_RY.xml	Tue Nov 10 09:13:44 2015 -0500
@@ -33,11 +33,11 @@
 
 **Input**
 
-The input for this tool is a fasta file.
+The input for this tool is a fastq file.
 
 **Output**
 
-This tool produces a single fasta file containing the converted barcodes.
+This tool produces a single fastq file containing the converted barcodes.
 
 ** References **
 ]]></help>
--- a/merge_pcr_duplicates.py	Tue Nov 10 08:12:25 2015 -0500
+++ b/merge_pcr_duplicates.py	Tue Nov 10 09:13:44 2015 -0500
@@ -8,7 +8,7 @@
 
 Input:
 * bed6 file containing alignments with fastq read-id in name field
-* fasta library with fastq read-id as sequence ids
+* fastq library of random barcodes
 
 Output:
 * bed6 file with random barcode in name field and number of PCR duplicates as
@@ -61,6 +61,11 @@
 parser.add_argument(
     "-o", "--outfile",
     help="Write results to this file.")
+parser.add_argument(
+    "--fasta-library",
+    dest="fasta_library",
+    action="store_true",
+    help="Read random barcode library as fasta format.")
 # misc arguments
 parser.add_argument(
     "-v", "--verbose",
@@ -93,7 +98,10 @@
 
 # load barcode library into dictionary
 input_handle = open(args.bclib, "rU")
-input_seq_iterator = SeqIO.parse(input_handle, "fasta")
+if args.fasta_library:
+    input_seq_iterator = SeqIO.parse(input_handle, "fasta")
+else:
+    input_seq_iterator = SeqIO.parse(input_handle, "fastq")
 bcs = pd.DataFrame.from_records(
     data=fasta_tuple_generator(input_seq_iterator),
     columns=["read_id", "bc"])
--- a/merge_pcr_duplicates.xml	Tue Nov 10 08:12:25 2015 -0500
+++ b/merge_pcr_duplicates.xml	Tue Nov 10 09:13:44 2015 -0500
@@ -38,7 +38,7 @@
 Input::
 
 * bed6 file containing alignments with fastq read-id in name field
-* fasta library with fastq read-id as sequence ids
+* fastq library of random barcodes
 
 Output::