Mercurial > repos > dfornika > pick_plasmids_containing_genes
annotate pick_plasmids_containing_genes.py @ 2:2dd1a0ed7cce draft
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
| author | dfornika |
|---|---|
| date | Sat, 02 Nov 2019 01:32:22 -0400 |
| parents | a938371b3bfd |
| children | 109b9d1e2e99 |
| rev | line source |
|---|---|
|
0
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
1 #!/usr/bin/env python |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
2 |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
3 from __future__ import print_function |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
4 |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
5 import argparse |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
6 import errno |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
7 import csv |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
8 import os |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
9 import re |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
10 import shutil |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
11 import sys |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
12 |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
13 from pprint import pprint |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
14 |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
15 def parse_screen_file(screen_file): |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
16 screen = [] |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
17 with open(screen_file) as f: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
18 reader = csv.DictReader(f, delimiter="\t", quotechar='"') |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
19 for row in reader: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
20 screen.append(row) |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
21 return screen |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
22 |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
23 def main(args): |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
24 # create output directory |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
25 try: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
26 os.mkdir(args.outdir) |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
27 except OSError as exc: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
28 if exc.errno == errno.EEXIST and os.path.isdir(args.outdir): |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
29 pass |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
30 else: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
31 raise |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
32 |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
33 # parse screening file |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
34 screen = parse_screen_file(args.abricate_report_screening_file) |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
35 contigs_with_genes_of_interest = [] |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
36 # parse all abricate reports and determine which ones contain genes of interest |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
37 print("\t".join(["file", "gene_detected"])) |
|
2
2dd1a0ed7cce
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents:
0
diff
changeset
|
38 |
|
2dd1a0ed7cce
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents:
0
diff
changeset
|
39 with open(args.concatenated_abricate_reports, 'r') as f: |
|
2dd1a0ed7cce
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents:
0
diff
changeset
|
40 abricate_report_reader = csv.DictReader(f, delimiter="\t", quotechar='"') |
|
2dd1a0ed7cce
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents:
0
diff
changeset
|
41 for gene in screen: |
|
2dd1a0ed7cce
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents:
0
diff
changeset
|
42 for abricate_report_row in abricate_report_reader: |
|
2dd1a0ed7cce
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents:
0
diff
changeset
|
43 if abricate_report_row['#FILE'] == '#FILE': |
|
2dd1a0ed7cce
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents:
0
diff
changeset
|
44 continue |
|
2dd1a0ed7cce
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents:
0
diff
changeset
|
45 if re.search(gene['regex'], abricate_report_row['GENE']): |
|
2dd1a0ed7cce
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents:
0
diff
changeset
|
46 contigs_with_genes_of_interest.append(abricate_report_row['SEQUENCE']) |
|
2dd1a0ed7cce
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents:
0
diff
changeset
|
47 f.seek(0) |
|
2dd1a0ed7cce
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents:
0
diff
changeset
|
48 next(abricate_report_reader) |
|
0
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
49 |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
50 # copy the corresponding plasmid fasta files into outdir |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
51 for contig in contigs_with_genes_of_interest: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
52 for plasmid in args.plasmids: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
53 copy_plasmid = False |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
54 with open(plasmid, 'r') as f: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
55 for line in f: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
56 if ('>' + contig) == line.rstrip(): |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
57 copy_plasmid = True |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
58 if copy_plasmid: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
59 print("\t".join([plasmid, "True"])) |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
60 shutil.copy2(plasmid, args.outdir) |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
61 |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
62 if __name__ == '__main__': |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
63 parser = argparse.ArgumentParser() |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
64 parser.add_argument("--plasmids", nargs='+', help="plasmid assemblies (fasta)") |
|
2
2dd1a0ed7cce
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents:
0
diff
changeset
|
65 parser.add_argument("--concatenated_abricate_reports", help="abricate reports (tsv)") |
|
0
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
66 parser.add_argument("--abricate_report_screening_file", help="") |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
67 parser.add_argument("--outdir", dest="outdir", default=".", help="Output directory") |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
68 args = parser.parse_args() |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
69 main(args) |
