diff multicov.py @ 0:3c0451ca266e draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
author artbio
date Tue, 07 Jan 2020 11:08:31 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/multicov.py	Tue Jan 07 11:08:31 2020 +0000
@@ -0,0 +1,60 @@
+import argparse
+
+import numpy
+
+import pysam
+
+
+def Parser():
+    the_parser = argparse.ArgumentParser()
+    the_parser.add_argument('-bams', '--bams', dest='bams', required=True,
+                            nargs='+', help='list of input BAM files')
+    the_parser.add_argument('-bed', '--bed', dest='bed', required=True,
+                            help='Coordinates of probes in a bed file')
+    args = the_parser.parse_args()
+    return args
+
+
+def compute_coverage(bam, bed, quality=10):
+    bam_object = pysam.AlignmentFile(bam, 'rb')
+    bed_object = open(bed, 'r')
+    coverage_column = []
+    for line in bed_object:
+        if line[0] == '#':
+            continue
+        fields = line[:-1].split('\t')
+        chr = fields[0]
+        start = fields[1]
+        end = fields[2]
+        coverage = bam_object.count_coverage(contig=chr,
+                                             start=int(start)-1,
+                                             stop=int(end),
+                                             quality_threshold=quality)
+        """ Add the 4 coverage values """
+        coverage = [sum(x) for x in zip(*coverage)]
+        coverage_column.append(numpy.mean(coverage))
+    bed_object.close()
+    return coverage_column
+
+
+def main(bams, bed):
+    column_dict = {}
+    for i, bam in enumerate(bams):
+        column_dict[i] = compute_coverage(bam, bed)
+    F = open(bed, 'r')
+    line_counter = 0
+    for line in F:
+        if line[0] == '#':
+            continue
+        prefix = line[:-1]
+        crossline = []
+        for col in sorted(column_dict):
+            crossline.append(str(column_dict[col][line_counter]))
+        line_counter += 1
+        suffix = '\t'.join(crossline)
+        print('%s\t%s' % (prefix, suffix))
+
+
+if __name__ == "__main__":
+    args = Parser()
+    main(args.bams, args.bed)