Mercurial > repos > artbio > probecoverage
annotate multicov.py @ 0:3c0451ca266e draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
| author | artbio |
|---|---|
| date | Tue, 07 Jan 2020 11:08:31 +0000 |
| parents | |
| children |
| rev | line source |
|---|---|
|
0
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
1 import argparse |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
2 |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
3 import numpy |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
4 |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
5 import pysam |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
6 |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
7 |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
8 def Parser(): |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
9 the_parser = argparse.ArgumentParser() |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
10 the_parser.add_argument('-bams', '--bams', dest='bams', required=True, |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
11 nargs='+', help='list of input BAM files') |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
12 the_parser.add_argument('-bed', '--bed', dest='bed', required=True, |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
13 help='Coordinates of probes in a bed file') |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
14 args = the_parser.parse_args() |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
15 return args |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
16 |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
17 |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
18 def compute_coverage(bam, bed, quality=10): |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
19 bam_object = pysam.AlignmentFile(bam, 'rb') |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
20 bed_object = open(bed, 'r') |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
21 coverage_column = [] |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
22 for line in bed_object: |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
23 if line[0] == '#': |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
24 continue |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
25 fields = line[:-1].split('\t') |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
26 chr = fields[0] |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
27 start = fields[1] |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
28 end = fields[2] |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
29 coverage = bam_object.count_coverage(contig=chr, |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
30 start=int(start)-1, |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
31 stop=int(end), |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
32 quality_threshold=quality) |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
33 """ Add the 4 coverage values """ |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
34 coverage = [sum(x) for x in zip(*coverage)] |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
35 coverage_column.append(numpy.mean(coverage)) |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
36 bed_object.close() |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
37 return coverage_column |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
38 |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
39 |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
40 def main(bams, bed): |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
41 column_dict = {} |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
42 for i, bam in enumerate(bams): |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
43 column_dict[i] = compute_coverage(bam, bed) |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
44 F = open(bed, 'r') |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
45 line_counter = 0 |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
46 for line in F: |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
47 if line[0] == '#': |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
48 continue |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
49 prefix = line[:-1] |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
50 crossline = [] |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
51 for col in sorted(column_dict): |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
52 crossline.append(str(column_dict[col][line_counter])) |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
53 line_counter += 1 |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
54 suffix = '\t'.join(crossline) |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
55 print('%s\t%s' % (prefix, suffix)) |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
56 |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
57 |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
58 if __name__ == "__main__": |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
59 args = Parser() |
|
3c0451ca266e
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff
changeset
|
60 main(args.bams, args.bed) |
