Mercurial > repos > rnateam > bctools
comparison extract_bcs.py @ 7:bb59215dfd8f draft
Uploaded
| author | rnateam |
|---|---|
| date | Tue, 10 Nov 2015 08:12:25 -0500 |
| parents | e841de88235c |
| children | 0b9aab6aaebf |
comparison
equal
deleted
inserted
replaced
| 6:1bfc5a5de843 | 7:bb59215dfd8f |
|---|---|
| 6 By default output is written to stdout. | 6 By default output is written to stdout. |
| 7 | 7 |
| 8 Example usage: | 8 Example usage: |
| 9 - remove barcode nucleotides at positions 1-3 and 6-7 from FASTQ; write modified | 9 - remove barcode nucleotides at positions 1-3 and 6-7 from FASTQ; write modified |
| 10 FASTQ entries to output.fastq and barcode nucleotides to barcodes.fa: | 10 FASTQ entries to output.fastq and barcode nucleotides to barcodes.fa: |
| 11 fastq_extract_barcodes.py barcoded_input.fastq XXXNNXX --out output.fastq --bcs barcodes.fa | 11 fastq_extract_barcodes.py barcoded_input.fastq XXXNNXX --out output.fastq --bcs barcodes.fastq |
| 12 """ | 12 """ |
| 13 | 13 |
| 14 epilog = """ | 14 epilog = """ |
| 15 Author: Daniel Maticzka | 15 Author: Daniel Maticzka |
| 16 Copyright: 2015 | 16 Copyright: 2015 |
| 46 "-o", "--outfile", | 46 "-o", "--outfile", |
| 47 help="Write results to this file.") | 47 help="Write results to this file.") |
| 48 parser.add_argument( | 48 parser.add_argument( |
| 49 "-b", "--bcs", | 49 "-b", "--bcs", |
| 50 dest="out_bc_fasta", | 50 dest="out_bc_fasta", |
| 51 help="If set, barcodes are written to this file in FASTA format.") | 51 help="Write barcodes to this file in FASTQ format.") |
| 52 parser.add_argument( | |
| 53 "--fasta-barcodes", | |
| 54 dest="save_bcs_as_fa", | |
| 55 action="store_true", | |
| 56 help="Save extracted barcodes in FASTA format.") | |
| 52 parser.add_argument( | 57 parser.add_argument( |
| 53 "-a", "--add-bc-to-fastq", | 58 "-a", "--add-bc-to-fastq", |
| 54 dest="add_to_head", | 59 dest="add_to_head", |
| 55 help="If set, append extracted barcodes to the FASTQ headers.", | 60 help="Append extracted barcodes to the FASTQ headers.", |
| 56 action="store_true" | 61 action="store_true") |
| 57 ) | |
| 58 parser.add_argument( | 62 parser.add_argument( |
| 59 "-v", "--verbose", | 63 "-v", "--verbose", |
| 60 help="Be verbose.", | 64 help="Be verbose.", |
| 61 action="store_true") | 65 action="store_true") |
| 62 parser.add_argument( | 66 parser.add_argument( |
| 64 help="Print lots of debugging information", | 68 help="Print lots of debugging information", |
| 65 action="store_true") | 69 action="store_true") |
| 66 parser.add_argument( | 70 parser.add_argument( |
| 67 '--version', | 71 '--version', |
| 68 action='version', | 72 action='version', |
| 69 version='1.0.0') | 73 version='0.1.0') |
| 70 | 74 |
| 71 args = parser.parse_args() | 75 args = parser.parse_args() |
| 72 if args.debug: | 76 if args.debug: |
| 73 logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(filename)s - %(levelname)s - %(message)s") | 77 logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(filename)s - %(levelname)s - %(message)s") |
| 74 elif args.verbose: | 78 elif args.verbose: |
| 80 logging.info(" pattern: '{}'".format(args.pattern)) | 84 logging.info(" pattern: '{}'".format(args.pattern)) |
| 81 if args.outfile: | 85 if args.outfile: |
| 82 logging.info(" outfile: enabled writing to file") | 86 logging.info(" outfile: enabled writing to file") |
| 83 logging.info(" outfile: '{}'".format(args.outfile)) | 87 logging.info(" outfile: '{}'".format(args.outfile)) |
| 84 if args.out_bc_fasta: | 88 if args.out_bc_fasta: |
| 85 logging.info(" bcs: enabled writing barcodes to fasta file") | 89 logging.info(" bcs: enabled writing barcodes to fastq file") |
| 86 logging.info(" bcs: {}".format(args.out_bc_fasta)) | 90 logging.info(" bcs: {}".format(args.out_bc_fasta)) |
| 91 if args.save_bcs_as_fa: | |
| 92 logging.info(" fasta-barcodes: write barcodes in fasta format instead of fastq") | |
| 87 logging.info("") | 93 logging.info("") |
| 88 | 94 |
| 89 # check if supplied pattern is valid | 95 # check if supplied pattern is valid |
| 90 valid_pattern = re.compile("^[XN]+$") | 96 valid_pattern = re.compile("^[XN]+$") |
| 91 pattern_match = valid_pattern.match(args.pattern) | 97 pattern_match = valid_pattern.match(args.pattern) |
| 134 logging.debug("len(seq): {}".format(len(seq))) | 140 logging.debug("len(seq): {}".format(len(seq))) |
| 135 continue | 141 continue |
| 136 | 142 |
| 137 # extract barcode nucleotides | 143 # extract barcode nucleotides |
| 138 barcode_list = [] | 144 barcode_list = [] |
| 145 barcode_qual_list = [] | |
| 139 for bcstart, bcstop in barcode_positions: | 146 for bcstart, bcstop in barcode_positions: |
| 140 barcode_list.append(seq[bcstart:bcstop]) | 147 barcode_list.append(seq[bcstart:bcstop]) |
| 148 barcode_qual_list.append(qual[bcstart:bcstop]) | |
| 141 barcode = "".join(barcode_list) | 149 barcode = "".join(barcode_list) |
| 150 barcode_quals = "".join(barcode_qual_list) | |
| 142 logging.debug("extracted barcode: {}".format(barcode)) | 151 logging.debug("extracted barcode: {}".format(barcode)) |
| 143 | 152 |
| 144 # create new sequence and quality string without barcode nucleotides | 153 # create new sequence and quality string without barcode nucleotides |
| 145 new_seq_list = [] | 154 new_seq_list = [] |
| 146 new_qual_list = [] | 155 new_qual_list = [] |
| 165 annotated_header = header | 174 annotated_header = header |
| 166 samout.write("@%s\n%s\n+\n%s\n" % (annotated_header, new_seq, new_qual)) | 175 samout.write("@%s\n%s\n+\n%s\n" % (annotated_header, new_seq, new_qual)) |
| 167 | 176 |
| 168 # write barcode to fasta if requested | 177 # write barcode to fasta if requested |
| 169 if args.out_bc_fasta is not None: | 178 if args.out_bc_fasta is not None: |
| 170 faout.write(">{}\n{}\n".format(header, barcode)) | 179 if args.save_bcs_as_fa: |
| 180 faout.write(">{}\n{}\n".format(header, barcode)) | |
| 181 else: | |
| 182 faout.write("@{}\n{}\n+\n{}\n".format(header, barcode, barcode_quals)) | |
| 171 | 183 |
| 172 # close files | 184 # close files |
| 173 samout.close() | 185 samout.close() |
| 174 if args.out_bc_fasta is not None: | 186 if args.out_bc_fasta is not None: |
| 175 faout.close() | 187 faout.close() |
