comparison bamparse.py @ 0:b3107800eea7 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
author artbio
date Mon, 17 Jul 2023 01:02:01 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:b3107800eea7
1 #!/usr/bin/env python
2 import argparse
3 from collections import defaultdict
4
5 import pysam
6
7
8 def Parser():
9 the_parser = argparse.ArgumentParser()
10 the_parser.add_argument('--output', nargs='+', action='store', type=str,
11 help='Count tables')
12 the_parser.add_argument('--alignments', nargs='+',
13 help="bam alignments files")
14 the_parser.add_argument('--labels', nargs='+', help="Alignments labels")
15 the_parser.add_argument('--number',
16 choices=["unique", "multiple"],
17 help="output is a single table or multiple tables")
18 args = the_parser.parse_args()
19 return args
20
21
22 def get_counts(bamfile):
23 """
24 Takes an AlignmentFile object and returns a dictionary of counts for sense,
25 antisense, or both sense and antisense bam alignments to the references,
26 depending on the pre-treatment performed by sambamba in the xml wrapper
27 """
28 counts = defaultdict(int)
29 for ref_name in bamfile.references:
30 counts[ref_name] = 0
31 for ref_name in bamfile.references:
32 counts[ref_name] = bamfile.count(reference=ref_name)
33 return counts
34
35
36 def writetable(diclist, labels, output, number):
37 ''' diclist is a list of count dictionnaries '''
38 countlists = []
39 for dic in diclist:
40 counts = sorted(dic.items())
41 counts = [j for (i, j) in counts]
42 countlists.append(counts)
43 if number == "unique":
44 out = open("outputdir/table.tabular", "w")
45 out.write("gene\t%s\n" % "\t".join(labels))
46 for countline in zip(sorted(diclist[0]), *countlists):
47 line = [str(i) for i in countline]
48 out.write("%s\n" % "\t".join(line))
49 out.close()
50 else:
51 for i, (dic, label) in enumerate(zip(diclist, labels)):
52 out = open("outputdir/table" + str(i) + ".tabular", "w")
53 out.write("gene\t%s\n" % label)
54 for gene in sorted(dic):
55 out.write("%s\t%s\n" % (gene, dic[gene]))
56 out.close()
57
58
59 def main(alignments, labels, output, number):
60 diclist = []
61 for file in alignments:
62 bam_object = pysam.AlignmentFile(file, 'rb')
63 diclist.append(get_counts(bam_object))
64 writetable(diclist, labels, output, number)
65
66
67 if __name__ == "__main__":
68 args = Parser()
69 main(args.alignments, args.labels, args.output, args.number)