Mercurial > repos > artbio > probecoverage
diff multicov.py @ 0:3c0451ca266e draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
| author | artbio |
|---|---|
| date | Tue, 07 Jan 2020 11:08:31 +0000 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/multicov.py Tue Jan 07 11:08:31 2020 +0000 @@ -0,0 +1,60 @@ +import argparse + +import numpy + +import pysam + + +def Parser(): + the_parser = argparse.ArgumentParser() + the_parser.add_argument('-bams', '--bams', dest='bams', required=True, + nargs='+', help='list of input BAM files') + the_parser.add_argument('-bed', '--bed', dest='bed', required=True, + help='Coordinates of probes in a bed file') + args = the_parser.parse_args() + return args + + +def compute_coverage(bam, bed, quality=10): + bam_object = pysam.AlignmentFile(bam, 'rb') + bed_object = open(bed, 'r') + coverage_column = [] + for line in bed_object: + if line[0] == '#': + continue + fields = line[:-1].split('\t') + chr = fields[0] + start = fields[1] + end = fields[2] + coverage = bam_object.count_coverage(contig=chr, + start=int(start)-1, + stop=int(end), + quality_threshold=quality) + """ Add the 4 coverage values """ + coverage = [sum(x) for x in zip(*coverage)] + coverage_column.append(numpy.mean(coverage)) + bed_object.close() + return coverage_column + + +def main(bams, bed): + column_dict = {} + for i, bam in enumerate(bams): + column_dict[i] = compute_coverage(bam, bed) + F = open(bed, 'r') + line_counter = 0 + for line in F: + if line[0] == '#': + continue + prefix = line[:-1] + crossline = [] + for col in sorted(column_dict): + crossline.append(str(column_dict[col][line_counter])) + line_counter += 1 + suffix = '\t'.join(crossline) + print('%s\t%s' % (prefix, suffix)) + + +if __name__ == "__main__": + args = Parser() + main(args.bams, args.bed)
