Mercurial > repos > dfornika > pick_plasmids_containing_genes
annotate pick_plasmids_containing_genes.py @ 0:a938371b3bfd draft
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
| author | dfornika |
|---|---|
| date | Sat, 02 Nov 2019 00:33:07 -0400 |
| parents | |
| children | 2dd1a0ed7cce |
| rev | line source |
|---|---|
|
0
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
1 #!/usr/bin/env python |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
2 |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
3 from __future__ import print_function |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
4 |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
5 import argparse |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
6 import errno |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
7 import csv |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
8 import os |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
9 import re |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
10 import shutil |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
11 import sys |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
12 |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
13 from pprint import pprint |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
14 |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
15 def parse_screen_file(screen_file): |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
16 screen = [] |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
17 with open(screen_file) as f: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
18 reader = csv.DictReader(f, delimiter="\t", quotechar='"') |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
19 for row in reader: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
20 screen.append(row) |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
21 return screen |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
22 |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
23 def get_fieldnames(input_file): |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
24 with open(input_file) as f: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
25 reader = csv.DictReader(f, delimiter="\t", quotechar='"') |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
26 fieldnames = reader.fieldnames |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
27 return fieldnames |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
28 |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
29 def main(args): |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
30 # create output directory |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
31 try: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
32 os.mkdir(args.outdir) |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
33 except OSError as exc: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
34 if exc.errno == errno.EEXIST and os.path.isdir(args.outdir): |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
35 pass |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
36 else: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
37 raise |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
38 |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
39 # parse screening file |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
40 screen = parse_screen_file(args.abricate_report_screening_file) |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
41 contigs_with_genes_of_interest = [] |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
42 # parse all abricate reports and determine which ones contain genes of interest |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
43 print("\t".join(["file", "gene_detected"])) |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
44 for abricate_report in args.abricate_reports: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
45 gene_of_interest_detected = False |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
46 with open(abricate_report, 'r') as f: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
47 abricate_report_reader = csv.DictReader(f, delimiter="\t", quotechar='"') |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
48 for gene in screen: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
49 for abricate_report_row in abricate_report_reader: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
50 if re.search(gene['regex'], abricate_report_row['GENE']): |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
51 gene_of_interest_detected = True |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
52 contigs_with_genes_of_interest.append(abricate_report_row['SEQUENCE']) |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
53 f.seek(0) |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
54 next(abricate_report_reader) |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
55 print("\t".join([abricate_report, str(gene_of_interest_detected)])) |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
56 |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
57 # copy the corresponding plasmid fasta files into outdir |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
58 for contig in contigs_with_genes_of_interest: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
59 for plasmid in args.plasmids: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
60 copy_plasmid = False |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
61 with open(plasmid, 'r') as f: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
62 for line in f: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
63 if ('>' + contig) == line.rstrip(): |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
64 copy_plasmid = True |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
65 if copy_plasmid: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
66 print("\t".join([plasmid, "True"])) |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
67 shutil.copy2(plasmid, args.outdir) |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
68 |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
69 if __name__ == '__main__': |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
70 parser = argparse.ArgumentParser() |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
71 parser.add_argument("--plasmids", nargs='+', help="plasmid assemblies (fasta)") |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
72 parser.add_argument("--abricate_reports", nargs='+', help="abricate reports (tsv)") |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
73 parser.add_argument("--abricate_report_screening_file", help="") |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
74 parser.add_argument("--outdir", dest="outdir", default=".", help="Output directory") |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
75 args = parser.parse_args() |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
76 main(args) |
