Mercurial > repos > artbio > bamparse
comparison bamparse.py @ 0:b3107800eea7 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
| author | artbio |
|---|---|
| date | Mon, 17 Jul 2023 01:02:01 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:b3107800eea7 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 import argparse | |
| 3 from collections import defaultdict | |
| 4 | |
| 5 import pysam | |
| 6 | |
| 7 | |
| 8 def Parser(): | |
| 9 the_parser = argparse.ArgumentParser() | |
| 10 the_parser.add_argument('--output', nargs='+', action='store', type=str, | |
| 11 help='Count tables') | |
| 12 the_parser.add_argument('--alignments', nargs='+', | |
| 13 help="bam alignments files") | |
| 14 the_parser.add_argument('--labels', nargs='+', help="Alignments labels") | |
| 15 the_parser.add_argument('--number', | |
| 16 choices=["unique", "multiple"], | |
| 17 help="output is a single table or multiple tables") | |
| 18 args = the_parser.parse_args() | |
| 19 return args | |
| 20 | |
| 21 | |
| 22 def get_counts(bamfile): | |
| 23 """ | |
| 24 Takes an AlignmentFile object and returns a dictionary of counts for sense, | |
| 25 antisense, or both sense and antisense bam alignments to the references, | |
| 26 depending on the pre-treatment performed by sambamba in the xml wrapper | |
| 27 """ | |
| 28 counts = defaultdict(int) | |
| 29 for ref_name in bamfile.references: | |
| 30 counts[ref_name] = 0 | |
| 31 for ref_name in bamfile.references: | |
| 32 counts[ref_name] = bamfile.count(reference=ref_name) | |
| 33 return counts | |
| 34 | |
| 35 | |
| 36 def writetable(diclist, labels, output, number): | |
| 37 ''' diclist is a list of count dictionnaries ''' | |
| 38 countlists = [] | |
| 39 for dic in diclist: | |
| 40 counts = sorted(dic.items()) | |
| 41 counts = [j for (i, j) in counts] | |
| 42 countlists.append(counts) | |
| 43 if number == "unique": | |
| 44 out = open("outputdir/table.tabular", "w") | |
| 45 out.write("gene\t%s\n" % "\t".join(labels)) | |
| 46 for countline in zip(sorted(diclist[0]), *countlists): | |
| 47 line = [str(i) for i in countline] | |
| 48 out.write("%s\n" % "\t".join(line)) | |
| 49 out.close() | |
| 50 else: | |
| 51 for i, (dic, label) in enumerate(zip(diclist, labels)): | |
| 52 out = open("outputdir/table" + str(i) + ".tabular", "w") | |
| 53 out.write("gene\t%s\n" % label) | |
| 54 for gene in sorted(dic): | |
| 55 out.write("%s\t%s\n" % (gene, dic[gene])) | |
| 56 out.close() | |
| 57 | |
| 58 | |
| 59 def main(alignments, labels, output, number): | |
| 60 diclist = [] | |
| 61 for file in alignments: | |
| 62 bam_object = pysam.AlignmentFile(file, 'rb') | |
| 63 diclist.append(get_counts(bam_object)) | |
| 64 writetable(diclist, labels, output, number) | |
| 65 | |
| 66 | |
| 67 if __name__ == "__main__": | |
| 68 args = Parser() | |
| 69 main(args.alignments, args.labels, args.output, args.number) |
