Mercurial > repos > rnateam > bctools
comparison merge_pcr_duplicates.py @ 8:17ef0e0dae68 draft
Uploaded
author | rnateam |
---|---|
date | Tue, 10 Nov 2015 09:13:44 -0500 |
parents | de4ea3aa1090 |
children | 570a7de9f151 |
comparison
equal
deleted
inserted
replaced
7:bb59215dfd8f | 8:17ef0e0dae68 |
---|---|
6 Barcodes containing uncalled base 'N' are removed. By default output is written | 6 Barcodes containing uncalled base 'N' are removed. By default output is written |
7 to stdout. | 7 to stdout. |
8 | 8 |
9 Input: | 9 Input: |
10 * bed6 file containing alignments with fastq read-id in name field | 10 * bed6 file containing alignments with fastq read-id in name field |
11 * fasta library with fastq read-id as sequence ids | 11 * fastq library of random barcodes |
12 | 12 |
13 Output: | 13 Output: |
14 * bed6 file with random barcode in name field and number of PCR duplicates as | 14 * bed6 file with random barcode in name field and number of PCR duplicates as |
15 score, sorted by fields chrom, start, stop, strand, name | 15 score, sorted by fields chrom, start, stop, strand, name |
16 | 16 |
59 help="Path to fasta barcode library.") | 59 help="Path to fasta barcode library.") |
60 # optional arguments | 60 # optional arguments |
61 parser.add_argument( | 61 parser.add_argument( |
62 "-o", "--outfile", | 62 "-o", "--outfile", |
63 help="Write results to this file.") | 63 help="Write results to this file.") |
64 parser.add_argument( | |
65 "--fasta-library", | |
66 dest="fasta_library", | |
67 action="store_true", | |
68 help="Read random barcode library as fasta format.") | |
64 # misc arguments | 69 # misc arguments |
65 parser.add_argument( | 70 parser.add_argument( |
66 "-v", "--verbose", | 71 "-v", "--verbose", |
67 help="Be verbose.", | 72 help="Be verbose.", |
68 action="store_true") | 73 action="store_true") |
91 logging.info(" outfile: '{}'".format(args.outfile)) | 96 logging.info(" outfile: '{}'".format(args.outfile)) |
92 logging.info("") | 97 logging.info("") |
93 | 98 |
94 # load barcode library into dictionary | 99 # load barcode library into dictionary |
95 input_handle = open(args.bclib, "rU") | 100 input_handle = open(args.bclib, "rU") |
96 input_seq_iterator = SeqIO.parse(input_handle, "fasta") | 101 if args.fasta_library: |
102 input_seq_iterator = SeqIO.parse(input_handle, "fasta") | |
103 else: | |
104 input_seq_iterator = SeqIO.parse(input_handle, "fastq") | |
97 bcs = pd.DataFrame.from_records( | 105 bcs = pd.DataFrame.from_records( |
98 data=fasta_tuple_generator(input_seq_iterator), | 106 data=fasta_tuple_generator(input_seq_iterator), |
99 columns=["read_id", "bc"]) | 107 columns=["read_id", "bc"]) |
100 | 108 |
101 # load alignments | 109 # load alignments |