Mercurial > repos > artbio > bamparse
annotate bamparse.py @ 0:b3107800eea7 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
| author | artbio |
|---|---|
| date | Mon, 17 Jul 2023 01:02:01 +0000 |
| parents | |
| children |
| rev | line source |
|---|---|
|
0
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
1 #!/usr/bin/env python |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
2 import argparse |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
3 from collections import defaultdict |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
4 |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
5 import pysam |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
6 |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
7 |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
8 def Parser(): |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
9 the_parser = argparse.ArgumentParser() |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
10 the_parser.add_argument('--output', nargs='+', action='store', type=str, |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
11 help='Count tables') |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
12 the_parser.add_argument('--alignments', nargs='+', |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
13 help="bam alignments files") |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
14 the_parser.add_argument('--labels', nargs='+', help="Alignments labels") |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
15 the_parser.add_argument('--number', |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
16 choices=["unique", "multiple"], |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
17 help="output is a single table or multiple tables") |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
18 args = the_parser.parse_args() |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
19 return args |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
20 |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
21 |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
22 def get_counts(bamfile): |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
23 """ |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
24 Takes an AlignmentFile object and returns a dictionary of counts for sense, |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
25 antisense, or both sense and antisense bam alignments to the references, |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
26 depending on the pre-treatment performed by sambamba in the xml wrapper |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
27 """ |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
28 counts = defaultdict(int) |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
29 for ref_name in bamfile.references: |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
30 counts[ref_name] = 0 |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
31 for ref_name in bamfile.references: |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
32 counts[ref_name] = bamfile.count(reference=ref_name) |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
33 return counts |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
34 |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
35 |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
36 def writetable(diclist, labels, output, number): |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
37 ''' diclist is a list of count dictionnaries ''' |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
38 countlists = [] |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
39 for dic in diclist: |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
40 counts = sorted(dic.items()) |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
41 counts = [j for (i, j) in counts] |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
42 countlists.append(counts) |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
43 if number == "unique": |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
44 out = open("outputdir/table.tabular", "w") |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
45 out.write("gene\t%s\n" % "\t".join(labels)) |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
46 for countline in zip(sorted(diclist[0]), *countlists): |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
47 line = [str(i) for i in countline] |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
48 out.write("%s\n" % "\t".join(line)) |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
49 out.close() |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
50 else: |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
51 for i, (dic, label) in enumerate(zip(diclist, labels)): |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
52 out = open("outputdir/table" + str(i) + ".tabular", "w") |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
53 out.write("gene\t%s\n" % label) |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
54 for gene in sorted(dic): |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
55 out.write("%s\t%s\n" % (gene, dic[gene])) |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
56 out.close() |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
57 |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
58 |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
59 def main(alignments, labels, output, number): |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
60 diclist = [] |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
61 for file in alignments: |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
62 bam_object = pysam.AlignmentFile(file, 'rb') |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
63 diclist.append(get_counts(bam_object)) |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
64 writetable(diclist, labels, output, number) |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
65 |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
66 |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
67 if __name__ == "__main__": |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
68 args = Parser() |
|
b3107800eea7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
artbio
parents:
diff
changeset
|
69 main(args.alignments, args.labels, args.output, args.number) |
