annotate multicov.py @ 0:3c0451ca266e draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
author artbio
date Tue, 07 Jan 2020 11:08:31 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
1 import argparse
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
2
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
3 import numpy
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
4
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
5 import pysam
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
6
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
7
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
8 def Parser():
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
9 the_parser = argparse.ArgumentParser()
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
10 the_parser.add_argument('-bams', '--bams', dest='bams', required=True,
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
11 nargs='+', help='list of input BAM files')
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
12 the_parser.add_argument('-bed', '--bed', dest='bed', required=True,
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
13 help='Coordinates of probes in a bed file')
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
14 args = the_parser.parse_args()
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
15 return args
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
16
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
17
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
18 def compute_coverage(bam, bed, quality=10):
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
19 bam_object = pysam.AlignmentFile(bam, 'rb')
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
20 bed_object = open(bed, 'r')
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
21 coverage_column = []
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
22 for line in bed_object:
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
23 if line[0] == '#':
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
24 continue
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
25 fields = line[:-1].split('\t')
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
26 chr = fields[0]
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
27 start = fields[1]
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
28 end = fields[2]
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
29 coverage = bam_object.count_coverage(contig=chr,
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
30 start=int(start)-1,
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
31 stop=int(end),
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
32 quality_threshold=quality)
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
33 """ Add the 4 coverage values """
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
34 coverage = [sum(x) for x in zip(*coverage)]
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
35 coverage_column.append(numpy.mean(coverage))
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
36 bed_object.close()
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
37 return coverage_column
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
38
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
39
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
40 def main(bams, bed):
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
41 column_dict = {}
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
42 for i, bam in enumerate(bams):
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
43 column_dict[i] = compute_coverage(bam, bed)
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
44 F = open(bed, 'r')
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
45 line_counter = 0
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
46 for line in F:
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
47 if line[0] == '#':
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
48 continue
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
49 prefix = line[:-1]
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
50 crossline = []
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
51 for col in sorted(column_dict):
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
52 crossline.append(str(column_dict[col][line_counter]))
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
53 line_counter += 1
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
54 suffix = '\t'.join(crossline)
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
55 print('%s\t%s' % (prefix, suffix))
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
56
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
57
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
58 if __name__ == "__main__":
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
59 args = Parser()
3c0451ca266e "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
diff changeset
60 main(args.bams, args.bed)