Mercurial > repos > dfornika > pick_plasmids_containing_genes
diff pick_plasmids_containing_genes.py @ 0:a938371b3bfd draft
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
| author | dfornika |
|---|---|
| date | Sat, 02 Nov 2019 00:33:07 -0400 |
| parents | |
| children | 2dd1a0ed7cce |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pick_plasmids_containing_genes.py Sat Nov 02 00:33:07 2019 -0400 @@ -0,0 +1,76 @@ +#!/usr/bin/env python + +from __future__ import print_function + +import argparse +import errno +import csv +import os +import re +import shutil +import sys + +from pprint import pprint + +def parse_screen_file(screen_file): + screen = [] + with open(screen_file) as f: + reader = csv.DictReader(f, delimiter="\t", quotechar='"') + for row in reader: + screen.append(row) + return screen + +def get_fieldnames(input_file): + with open(input_file) as f: + reader = csv.DictReader(f, delimiter="\t", quotechar='"') + fieldnames = reader.fieldnames + return fieldnames + +def main(args): + # create output directory + try: + os.mkdir(args.outdir) + except OSError as exc: + if exc.errno == errno.EEXIST and os.path.isdir(args.outdir): + pass + else: + raise + + # parse screening file + screen = parse_screen_file(args.abricate_report_screening_file) + contigs_with_genes_of_interest = [] + # parse all abricate reports and determine which ones contain genes of interest + print("\t".join(["file", "gene_detected"])) + for abricate_report in args.abricate_reports: + gene_of_interest_detected = False + with open(abricate_report, 'r') as f: + abricate_report_reader = csv.DictReader(f, delimiter="\t", quotechar='"') + for gene in screen: + for abricate_report_row in abricate_report_reader: + if re.search(gene['regex'], abricate_report_row['GENE']): + gene_of_interest_detected = True + contigs_with_genes_of_interest.append(abricate_report_row['SEQUENCE']) + f.seek(0) + next(abricate_report_reader) + print("\t".join([abricate_report, str(gene_of_interest_detected)])) + + # copy the corresponding plasmid fasta files into outdir + for contig in contigs_with_genes_of_interest: + for plasmid in args.plasmids: + copy_plasmid = False + with open(plasmid, 'r') as f: + for line in f: + if ('>' + contig) == line.rstrip(): + copy_plasmid = True + if copy_plasmid: + print("\t".join([plasmid, "True"])) + shutil.copy2(plasmid, args.outdir) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--plasmids", nargs='+', help="plasmid assemblies (fasta)") + parser.add_argument("--abricate_reports", nargs='+', help="abricate reports (tsv)") + parser.add_argument("--abricate_report_screening_file", help="") + parser.add_argument("--outdir", dest="outdir", default=".", help="Output directory") + args = parser.parse_args() + main(args)
