annotate pick_plasmids_containing_genes.py @ 0:a938371b3bfd draft

"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
author dfornika
date Sat, 02 Nov 2019 00:33:07 -0400
parents
children 2dd1a0ed7cce
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
1 #!/usr/bin/env python
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
2
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
3 from __future__ import print_function
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
4
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
5 import argparse
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
6 import errno
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
7 import csv
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
8 import os
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
9 import re
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
10 import shutil
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
11 import sys
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
12
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
13 from pprint import pprint
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
14
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
15 def parse_screen_file(screen_file):
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
16 screen = []
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
17 with open(screen_file) as f:
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
18 reader = csv.DictReader(f, delimiter="\t", quotechar='"')
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
19 for row in reader:
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
20 screen.append(row)
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
21 return screen
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
22
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
23 def get_fieldnames(input_file):
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
24 with open(input_file) as f:
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
25 reader = csv.DictReader(f, delimiter="\t", quotechar='"')
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
26 fieldnames = reader.fieldnames
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
27 return fieldnames
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
28
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
29 def main(args):
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
30 # create output directory
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
31 try:
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
32 os.mkdir(args.outdir)
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
33 except OSError as exc:
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
34 if exc.errno == errno.EEXIST and os.path.isdir(args.outdir):
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
35 pass
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
36 else:
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
37 raise
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
38
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
39 # parse screening file
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
40 screen = parse_screen_file(args.abricate_report_screening_file)
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
41 contigs_with_genes_of_interest = []
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
42 # parse all abricate reports and determine which ones contain genes of interest
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
43 print("\t".join(["file", "gene_detected"]))
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
44 for abricate_report in args.abricate_reports:
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
45 gene_of_interest_detected = False
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
46 with open(abricate_report, 'r') as f:
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
47 abricate_report_reader = csv.DictReader(f, delimiter="\t", quotechar='"')
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
48 for gene in screen:
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
49 for abricate_report_row in abricate_report_reader:
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
50 if re.search(gene['regex'], abricate_report_row['GENE']):
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
51 gene_of_interest_detected = True
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
52 contigs_with_genes_of_interest.append(abricate_report_row['SEQUENCE'])
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
53 f.seek(0)
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
54 next(abricate_report_reader)
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
55 print("\t".join([abricate_report, str(gene_of_interest_detected)]))
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
56
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
57 # copy the corresponding plasmid fasta files into outdir
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
58 for contig in contigs_with_genes_of_interest:
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
59 for plasmid in args.plasmids:
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
60 copy_plasmid = False
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
61 with open(plasmid, 'r') as f:
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
62 for line in f:
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
63 if ('>' + contig) == line.rstrip():
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
64 copy_plasmid = True
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
65 if copy_plasmid:
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
66 print("\t".join([plasmid, "True"]))
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
67 shutil.copy2(plasmid, args.outdir)
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
68
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
69 if __name__ == '__main__':
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
70 parser = argparse.ArgumentParser()
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
71 parser.add_argument("--plasmids", nargs='+', help="plasmid assemblies (fasta)")
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
72 parser.add_argument("--abricate_reports", nargs='+', help="abricate reports (tsv)")
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
73 parser.add_argument("--abricate_report_screening_file", help="")
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
74 parser.add_argument("--outdir", dest="outdir", default=".", help="Output directory")
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
75 args = parser.parse_args()
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
76 main(args)