comparison merge_pcr_duplicates.py @ 8:17ef0e0dae68 draft

Uploaded
author rnateam
date Tue, 10 Nov 2015 09:13:44 -0500
parents de4ea3aa1090
children 570a7de9f151
comparison
equal deleted inserted replaced
7:bb59215dfd8f 8:17ef0e0dae68
6 Barcodes containing uncalled base 'N' are removed. By default output is written 6 Barcodes containing uncalled base 'N' are removed. By default output is written
7 to stdout. 7 to stdout.
8 8
9 Input: 9 Input:
10 * bed6 file containing alignments with fastq read-id in name field 10 * bed6 file containing alignments with fastq read-id in name field
11 * fasta library with fastq read-id as sequence ids 11 * fastq library of random barcodes
12 12
13 Output: 13 Output:
14 * bed6 file with random barcode in name field and number of PCR duplicates as 14 * bed6 file with random barcode in name field and number of PCR duplicates as
15 score, sorted by fields chrom, start, stop, strand, name 15 score, sorted by fields chrom, start, stop, strand, name
16 16
59 help="Path to fasta barcode library.") 59 help="Path to fasta barcode library.")
60 # optional arguments 60 # optional arguments
61 parser.add_argument( 61 parser.add_argument(
62 "-o", "--outfile", 62 "-o", "--outfile",
63 help="Write results to this file.") 63 help="Write results to this file.")
64 parser.add_argument(
65 "--fasta-library",
66 dest="fasta_library",
67 action="store_true",
68 help="Read random barcode library as fasta format.")
64 # misc arguments 69 # misc arguments
65 parser.add_argument( 70 parser.add_argument(
66 "-v", "--verbose", 71 "-v", "--verbose",
67 help="Be verbose.", 72 help="Be verbose.",
68 action="store_true") 73 action="store_true")
91 logging.info(" outfile: '{}'".format(args.outfile)) 96 logging.info(" outfile: '{}'".format(args.outfile))
92 logging.info("") 97 logging.info("")
93 98
94 # load barcode library into dictionary 99 # load barcode library into dictionary
95 input_handle = open(args.bclib, "rU") 100 input_handle = open(args.bclib, "rU")
96 input_seq_iterator = SeqIO.parse(input_handle, "fasta") 101 if args.fasta_library:
102 input_seq_iterator = SeqIO.parse(input_handle, "fasta")
103 else:
104 input_seq_iterator = SeqIO.parse(input_handle, "fastq")
97 bcs = pd.DataFrame.from_records( 105 bcs = pd.DataFrame.from_records(
98 data=fasta_tuple_generator(input_seq_iterator), 106 data=fasta_tuple_generator(input_seq_iterator),
99 columns=["read_id", "bc"]) 107 columns=["read_id", "bc"])
100 108
101 # load alignments 109 # load alignments