Mercurial > repos > iuc > concoct
annotate coverage_table.py @ 2:bd409bbd287f draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 77dea70f156d56c83b6851b61dc997d2b344bdc9
author | iuc |
---|---|
date | Fri, 01 Jul 2022 14:10:59 +0000 |
parents | 1f4286d836a3 |
children |
rev | line source |
---|---|
1
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
1 #!/usr/bin/env python |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
2 |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
3 import argparse |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
4 import gzip |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
5 from functools import partial |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
6 |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
7 from Bio import SeqIO |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
8 |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
9 |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
10 def generate_coverage_table(input_fasta, input_tabular, gzipped, output): |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
11 # Read input file into a dict and return everything |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
12 # in the table format required by CONCOCT. |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
13 gc_and_len_dict = get_gc_and_len_dict(input_fasta, gzipped) |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
14 assert(len(gc_and_len_dict) > 0) |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
15 bed_coverage_dict = get_bed_coverage_dict(input_tabular) |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
16 |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
17 with open(output, 'w') as fh: |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
18 # Output the header. |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
19 fh.write("contig\tlength") |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
20 t = tuple(range(len(bed_coverage_dict))) |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
21 fh.write("\tcov_mean_sample_%d\n" % len(t)) |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
22 # Output the content. |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
23 for acc in gc_and_len_dict: |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
24 # Fasta stats. |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
25 fh.write("%s\t%s" % (acc, gc_and_len_dict[acc]['length'])) |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
26 # Mean |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
27 try: |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
28 # Coverage mean |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
29 fh.write("\t%f" % (bed_coverage_dict[acc]["cov_mean"])) |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
30 except KeyError: |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
31 # No reads mapped to this contig |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
32 fh.write("\t0") |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
33 fh.write("\n") |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
34 |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
35 |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
36 def get_bed_coverage_dict(input_tabular): |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
37 # Ddetermine mean coverage and percentage covered |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
38 # for each contig, returning a dict with fasta id |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
39 # as key and percentage covered and cov_mean as keys |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
40 # for the inner dict. |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
41 out_dict = {} |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
42 |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
43 with open(input_tabular, 'r') as fh: |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
44 for line in fh: |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
45 line = line.rstrip('\r\n') |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
46 cols = line.split('\t') |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
47 try: |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
48 d = out_dict[cols[0]] |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
49 except KeyError: |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
50 d = {} |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
51 out_dict[cols[0]] = d |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
52 if int(cols[1]) == 0: |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
53 d["percentage_covered"] = 100 - float(cols[4]) * 100.0 |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
54 else: |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
55 d["cov_mean"] = d.get("cov_mean", 0) + int(cols[1]) * float(cols[4]) |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
56 return out_dict |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
57 |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
58 |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
59 def get_gc_and_len_dict(input_fasta, gzipped): |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
60 # Creates a dictionary with the fasta id as key |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
61 # and GC and length as keys for the inner dictionary. |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
62 if gzipped: |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
63 _open = partial(gzip.open, mode='rt') |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
64 else: |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
65 _open = open |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
66 |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
67 out_dict = {} |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
68 with _open(input_fasta) as input_fh: |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
69 for rec in SeqIO.parse(input_fh, "fasta"): |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
70 out_dict[rec.id] = {} |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
71 out_dict[rec.id]["length"] = len(rec.seq) |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
72 return out_dict |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
73 |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
74 |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
75 parser = argparse.ArgumentParser(description=__doc__) |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
76 parser.add_argument('--input_tabular', action='store', dest='input_tabular', help='bedtools genomeCoverageBed bed file') |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
77 parser.add_argument('--input_fasta', action='store', dest='input_fasta', help='Contigs fasta file') |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
78 parser.add_argument("--gzipped", action="store_true", dest="gzipped", default=False, help="input_fasta is gzipped") |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
79 parser.add_argument('--output', action='store', dest='output', help='Output file') |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
80 |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
81 args = parser.parse_args() |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
82 |
1f4286d836a3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 40a09cbfd6052f7b0295946621db1bdf58228b09"
iuc
parents:
diff
changeset
|
83 generate_coverage_table(args.input_fasta, args.input_tabular, args.gzipped, args.output) |