Mercurial > repos > dfornika > pick_plasmids_containing_genes
annotate pick_plasmids_containing_genes.py @ 4:109b9d1e2e99 draft default tip
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit 0f3fff91eb329adf437224eb8f7449853083b01e-dirty"
| author | dfornika |
|---|---|
| date | Thu, 19 Dec 2019 21:06:14 +0000 |
| parents | 2dd1a0ed7cce |
| children |
| rev | line source |
|---|---|
|
0
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
1 #!/usr/bin/env python |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
2 |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
3 from __future__ import print_function |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
4 |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
5 import argparse |
|
4
109b9d1e2e99
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit 0f3fff91eb329adf437224eb8f7449853083b01e-dirty"
dfornika
parents:
2
diff
changeset
|
6 import csv |
|
0
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
7 import errno |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
8 import os |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
9 import re |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
10 import shutil |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
11 |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
12 |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
13 def parse_screen_file(screen_file): |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
14 screen = [] |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
15 with open(screen_file) as f: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
16 reader = csv.DictReader(f, delimiter="\t", quotechar='"') |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
17 for row in reader: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
18 screen.append(row) |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
19 return screen |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
20 |
|
4
109b9d1e2e99
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit 0f3fff91eb329adf437224eb8f7449853083b01e-dirty"
dfornika
parents:
2
diff
changeset
|
21 |
|
0
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
22 def main(args): |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
23 # create output directory |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
24 try: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
25 os.mkdir(args.outdir) |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
26 except OSError as exc: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
27 if exc.errno == errno.EEXIST and os.path.isdir(args.outdir): |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
28 pass |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
29 else: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
30 raise |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
31 |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
32 # parse screening file |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
33 screen = parse_screen_file(args.abricate_report_screening_file) |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
34 contigs_with_genes_of_interest = [] |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
35 # parse all abricate reports and determine which ones contain genes of interest |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
36 print("\t".join(["file", "gene_detected"])) |
|
2
2dd1a0ed7cce
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents:
0
diff
changeset
|
37 |
|
2dd1a0ed7cce
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents:
0
diff
changeset
|
38 with open(args.concatenated_abricate_reports, 'r') as f: |
|
2dd1a0ed7cce
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents:
0
diff
changeset
|
39 abricate_report_reader = csv.DictReader(f, delimiter="\t", quotechar='"') |
|
2dd1a0ed7cce
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents:
0
diff
changeset
|
40 for gene in screen: |
|
2dd1a0ed7cce
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents:
0
diff
changeset
|
41 for abricate_report_row in abricate_report_reader: |
|
2dd1a0ed7cce
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents:
0
diff
changeset
|
42 if abricate_report_row['#FILE'] == '#FILE': |
|
2dd1a0ed7cce
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents:
0
diff
changeset
|
43 continue |
|
2dd1a0ed7cce
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents:
0
diff
changeset
|
44 if re.search(gene['regex'], abricate_report_row['GENE']): |
|
2dd1a0ed7cce
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents:
0
diff
changeset
|
45 contigs_with_genes_of_interest.append(abricate_report_row['SEQUENCE']) |
|
2dd1a0ed7cce
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents:
0
diff
changeset
|
46 f.seek(0) |
|
2dd1a0ed7cce
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents:
0
diff
changeset
|
47 next(abricate_report_reader) |
|
0
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
48 |
|
4
109b9d1e2e99
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit 0f3fff91eb329adf437224eb8f7449853083b01e-dirty"
dfornika
parents:
2
diff
changeset
|
49 # copy the corresponding plasmid fasta files into outdir |
|
0
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
50 for contig in contigs_with_genes_of_interest: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
51 for plasmid in args.plasmids: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
52 copy_plasmid = False |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
53 with open(plasmid, 'r') as f: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
54 for line in f: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
55 if ('>' + contig) == line.rstrip(): |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
56 copy_plasmid = True |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
57 if copy_plasmid: |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
58 print("\t".join([plasmid, "True"])) |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
59 shutil.copy2(plasmid, args.outdir) |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
60 |
|
4
109b9d1e2e99
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit 0f3fff91eb329adf437224eb8f7449853083b01e-dirty"
dfornika
parents:
2
diff
changeset
|
61 |
|
0
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
62 if __name__ == '__main__': |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
63 parser = argparse.ArgumentParser() |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
64 parser.add_argument("--plasmids", nargs='+', help="plasmid assemblies (fasta)") |
|
2
2dd1a0ed7cce
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
dfornika
parents:
0
diff
changeset
|
65 parser.add_argument("--concatenated_abricate_reports", help="abricate reports (tsv)") |
|
0
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
66 parser.add_argument("--abricate_report_screening_file", help="") |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
67 parser.add_argument("--outdir", dest="outdir", default=".", help="Output directory") |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
68 args = parser.parse_args() |
|
a938371b3bfd
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit bf17791d0ee6756ebbd306614617f52034b8741c-dirty"
dfornika
parents:
diff
changeset
|
69 main(args) |
