# HG changeset patch # User rnateam # Date 1447164824 18000 # Node ID 17ef0e0dae6853f8f3fecc22da01b36657b931c9 # Parent bb59215dfd8f8c6284b1a6a081c624f1cf37220a Uploaded diff -r bb59215dfd8f -r 17ef0e0dae68 convert_bc_to_binary_RY.py --- a/convert_bc_to_binary_RY.py Tue Nov 10 08:12:25 2015 -0500 +++ b/convert_bc_to_binary_RY.py Tue Nov 10 09:13:44 2015 -0500 @@ -39,12 +39,17 @@ # positional arguments parser.add_argument( "infile", - help="Path to fasta input file.") + help="Path to fastq input file.") # optional arguments parser.add_argument( "-o", "--outfile", help="Write results to this file.") parser.add_argument( + "--fasta-format", + dest="fasta_format", + help="Read and write fasta instead of fastq format.", + action="store_true") +parser.add_argument( "-v", "--verbose", help="Be verbose.", action="store_true") @@ -73,8 +78,13 @@ def translate_nt_to_RY_iterator(robj): """Translate SeqRecords sequences to RY alphabet.""" for record in robj: + if not args.fasta_format: + saved_letter_annotations = record.letter_annotations + record.letter_annotations = {} record.seq = Seq(translate_nt_to_RY(str(record.seq)), IUPAC.unambiguous_dna) + if not args.fasta_format: + record.letter_annotations = saved_letter_annotations yield record # handle arguments @@ -94,8 +104,14 @@ # get input iterator input_handle = open(args.infile, "rU") -input_seq_iterator = SeqIO.parse(input_handle, "fasta") +if args.fasta_format: + input_seq_iterator = SeqIO.parse(input_handle, "fasta") +else: + input_seq_iterator = SeqIO.parse(input_handle, "fastq") convert_seq_iterator = translate_nt_to_RY_iterator(input_seq_iterator) output_handle = (open(args.outfile, "w") if args.outfile is not None else stdout) -SeqIO.write(convert_seq_iterator, output_handle, "fasta") +if args.fasta_format: + SeqIO.write(convert_seq_iterator, output_handle, "fasta") +else: + SeqIO.write(convert_seq_iterator, output_handle, "fastq") output_handle.close() diff -r bb59215dfd8f -r 17ef0e0dae68 convert_bc_to_binary_RY.xml --- a/convert_bc_to_binary_RY.xml Tue Nov 10 08:12:25 2015 -0500 +++ b/convert_bc_to_binary_RY.xml Tue Nov 10 09:13:44 2015 -0500 @@ -33,11 +33,11 @@ **Input** -The input for this tool is a fasta file. +The input for this tool is a fastq file. **Output** -This tool produces a single fasta file containing the converted barcodes. +This tool produces a single fastq file containing the converted barcodes. ** References ** ]]> diff -r bb59215dfd8f -r 17ef0e0dae68 merge_pcr_duplicates.py --- a/merge_pcr_duplicates.py Tue Nov 10 08:12:25 2015 -0500 +++ b/merge_pcr_duplicates.py Tue Nov 10 09:13:44 2015 -0500 @@ -8,7 +8,7 @@ Input: * bed6 file containing alignments with fastq read-id in name field -* fasta library with fastq read-id as sequence ids +* fastq library of random barcodes Output: * bed6 file with random barcode in name field and number of PCR duplicates as @@ -61,6 +61,11 @@ parser.add_argument( "-o", "--outfile", help="Write results to this file.") +parser.add_argument( + "--fasta-library", + dest="fasta_library", + action="store_true", + help="Read random barcode library as fasta format.") # misc arguments parser.add_argument( "-v", "--verbose", @@ -93,7 +98,10 @@ # load barcode library into dictionary input_handle = open(args.bclib, "rU") -input_seq_iterator = SeqIO.parse(input_handle, "fasta") +if args.fasta_library: + input_seq_iterator = SeqIO.parse(input_handle, "fasta") +else: + input_seq_iterator = SeqIO.parse(input_handle, "fastq") bcs = pd.DataFrame.from_records( data=fasta_tuple_generator(input_seq_iterator), columns=["read_id", "bc"]) diff -r bb59215dfd8f -r 17ef0e0dae68 merge_pcr_duplicates.xml --- a/merge_pcr_duplicates.xml Tue Nov 10 08:12:25 2015 -0500 +++ b/merge_pcr_duplicates.xml Tue Nov 10 09:13:44 2015 -0500 @@ -38,7 +38,7 @@ Input:: * bed6 file containing alignments with fastq read-id in name field -* fasta library with fastq read-id as sequence ids +* fastq library of random barcodes Output::