Mercurial > repos > rnateam > bctools
comparison merge_pcr_duplicates.py @ 8:17ef0e0dae68 draft
Uploaded
| author | rnateam |
|---|---|
| date | Tue, 10 Nov 2015 09:13:44 -0500 |
| parents | de4ea3aa1090 |
| children | 570a7de9f151 |
comparison
equal
deleted
inserted
replaced
| 7:bb59215dfd8f | 8:17ef0e0dae68 |
|---|---|
| 6 Barcodes containing uncalled base 'N' are removed. By default output is written | 6 Barcodes containing uncalled base 'N' are removed. By default output is written |
| 7 to stdout. | 7 to stdout. |
| 8 | 8 |
| 9 Input: | 9 Input: |
| 10 * bed6 file containing alignments with fastq read-id in name field | 10 * bed6 file containing alignments with fastq read-id in name field |
| 11 * fasta library with fastq read-id as sequence ids | 11 * fastq library of random barcodes |
| 12 | 12 |
| 13 Output: | 13 Output: |
| 14 * bed6 file with random barcode in name field and number of PCR duplicates as | 14 * bed6 file with random barcode in name field and number of PCR duplicates as |
| 15 score, sorted by fields chrom, start, stop, strand, name | 15 score, sorted by fields chrom, start, stop, strand, name |
| 16 | 16 |
| 59 help="Path to fasta barcode library.") | 59 help="Path to fasta barcode library.") |
| 60 # optional arguments | 60 # optional arguments |
| 61 parser.add_argument( | 61 parser.add_argument( |
| 62 "-o", "--outfile", | 62 "-o", "--outfile", |
| 63 help="Write results to this file.") | 63 help="Write results to this file.") |
| 64 parser.add_argument( | |
| 65 "--fasta-library", | |
| 66 dest="fasta_library", | |
| 67 action="store_true", | |
| 68 help="Read random barcode library as fasta format.") | |
| 64 # misc arguments | 69 # misc arguments |
| 65 parser.add_argument( | 70 parser.add_argument( |
| 66 "-v", "--verbose", | 71 "-v", "--verbose", |
| 67 help="Be verbose.", | 72 help="Be verbose.", |
| 68 action="store_true") | 73 action="store_true") |
| 91 logging.info(" outfile: '{}'".format(args.outfile)) | 96 logging.info(" outfile: '{}'".format(args.outfile)) |
| 92 logging.info("") | 97 logging.info("") |
| 93 | 98 |
| 94 # load barcode library into dictionary | 99 # load barcode library into dictionary |
| 95 input_handle = open(args.bclib, "rU") | 100 input_handle = open(args.bclib, "rU") |
| 96 input_seq_iterator = SeqIO.parse(input_handle, "fasta") | 101 if args.fasta_library: |
| 102 input_seq_iterator = SeqIO.parse(input_handle, "fasta") | |
| 103 else: | |
| 104 input_seq_iterator = SeqIO.parse(input_handle, "fastq") | |
| 97 bcs = pd.DataFrame.from_records( | 105 bcs = pd.DataFrame.from_records( |
| 98 data=fasta_tuple_generator(input_seq_iterator), | 106 data=fasta_tuple_generator(input_seq_iterator), |
| 99 columns=["read_id", "bc"]) | 107 columns=["read_id", "bc"]) |
| 100 | 108 |
| 101 # load alignments | 109 # load alignments |
