annotate pick_plasmids_containing_genes.py @ 2:2dd1a0ed7cce draft

"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
author dfornika
date Sat, 02 Nov 2019 01:32:22 -0400
parents a938371b3bfd
children 109b9d1e2e99
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
1 #!/usr/bin/env python
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
2
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
3 from __future__ import print_function
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
4
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
5 import argparse
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
6 import errno
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
7 import csv
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
8 import os
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
9 import re
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
10 import shutil
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
11 import sys
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
12
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
13 from pprint import pprint
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
14
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
15 def parse_screen_file(screen_file):
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
16 screen = []
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
17 with open(screen_file) as f:
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
18 reader = csv.DictReader(f, delimiter="\t", quotechar='"')
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
19 for row in reader:
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
20 screen.append(row)
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
21 return screen
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
22
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
23 def main(args):
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
24 # create output directory
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
25 try:
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
26 os.mkdir(args.outdir)
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
27 except OSError as exc:
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
28 if exc.errno == errno.EEXIST and os.path.isdir(args.outdir):
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
29 pass
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
30 else:
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
31 raise
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
32
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
33 # parse screening file
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
34 screen = parse_screen_file(args.abricate_report_screening_file)
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
35 contigs_with_genes_of_interest = []
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
36 # parse all abricate reports and determine which ones contain genes of interest
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
37 print("\t".join(["file", "gene_detected"]))
2
2dd1a0ed7cce "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents: 0
diff changeset
38
2dd1a0ed7cce "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents: 0
diff changeset
39 with open(args.concatenated_abricate_reports, 'r') as f:
2dd1a0ed7cce "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents: 0
diff changeset
40 abricate_report_reader = csv.DictReader(f, delimiter="\t", quotechar='"')
2dd1a0ed7cce "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents: 0
diff changeset
41 for gene in screen:
2dd1a0ed7cce "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents: 0
diff changeset
42 for abricate_report_row in abricate_report_reader:
2dd1a0ed7cce "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents: 0
diff changeset
43 if abricate_report_row['#FILE'] == '#FILE':
2dd1a0ed7cce "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents: 0
diff changeset
44 continue
2dd1a0ed7cce "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents: 0
diff changeset
45 if re.search(gene['regex'], abricate_report_row['GENE']):
2dd1a0ed7cce "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents: 0
diff changeset
46 contigs_with_genes_of_interest.append(abricate_report_row['SEQUENCE'])
2dd1a0ed7cce "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents: 0
diff changeset
47 f.seek(0)
2dd1a0ed7cce "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents: 0
diff changeset
48 next(abricate_report_reader)
0
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
49
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
50 # copy the corresponding plasmid fasta files into outdir
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
51 for contig in contigs_with_genes_of_interest:
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
52 for plasmid in args.plasmids:
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
53 copy_plasmid = False
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
54 with open(plasmid, 'r') as f:
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
55 for line in f:
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
56 if ('>' + contig) == line.rstrip():
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
57 copy_plasmid = True
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
58 if copy_plasmid:
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
59 print("\t".join([plasmid, "True"]))
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
60 shutil.copy2(plasmid, args.outdir)
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
61
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
62 if __name__ == '__main__':
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
63 parser = argparse.ArgumentParser()
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
64 parser.add_argument("--plasmids", nargs='+', help="plasmid assemblies (fasta)")
2
2dd1a0ed7cce "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents: 0
diff changeset
65 parser.add_argument("--concatenated_abricate_reports", help="abricate reports (tsv)")
0
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
66 parser.add_argument("--abricate_report_screening_file", help="")
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
67 parser.add_argument("--outdir", dest="outdir", default=".", help="Output directory")
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
68 args = parser.parse_args()
a938371b3bfd "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff changeset
69 main(args)