diff bamparse.py @ 0:b3107800eea7 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
author artbio
date Mon, 17 Jul 2023 01:02:01 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bamparse.py	Mon Jul 17 01:02:01 2023 +0000
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+import argparse
+from collections import defaultdict
+
+import pysam
+
+
+def Parser():
+    the_parser = argparse.ArgumentParser()
+    the_parser.add_argument('--output', nargs='+', action='store', type=str,
+                            help='Count tables')
+    the_parser.add_argument('--alignments', nargs='+',
+                            help="bam alignments files")
+    the_parser.add_argument('--labels', nargs='+', help="Alignments labels")
+    the_parser.add_argument('--number',
+                            choices=["unique", "multiple"],
+                            help="output is a single table or multiple tables")
+    args = the_parser.parse_args()
+    return args
+
+
+def get_counts(bamfile):
+    """
+    Takes an AlignmentFile object and returns a dictionary of counts for sense,
+    antisense, or both sense and antisense bam alignments to the references,
+    depending on the pre-treatment performed by sambamba in the xml wrapper
+    """
+    counts = defaultdict(int)
+    for ref_name in bamfile.references:
+        counts[ref_name] = 0
+    for ref_name in bamfile.references:
+        counts[ref_name] = bamfile.count(reference=ref_name)
+    return counts
+
+
+def writetable(diclist, labels, output, number):
+    ''' diclist is a list of count dictionnaries '''
+    countlists = []
+    for dic in diclist:
+        counts = sorted(dic.items())
+        counts = [j for (i, j) in counts]
+        countlists.append(counts)
+    if number == "unique":
+        out = open("outputdir/table.tabular", "w")
+        out.write("gene\t%s\n" % "\t".join(labels))
+        for countline in zip(sorted(diclist[0]), *countlists):
+            line = [str(i) for i in countline]
+            out.write("%s\n" % "\t".join(line))
+        out.close()
+    else:
+        for i, (dic, label) in enumerate(zip(diclist, labels)):
+            out = open("outputdir/table" + str(i) + ".tabular", "w")
+            out.write("gene\t%s\n" % label)
+            for gene in sorted(dic):
+                out.write("%s\t%s\n" % (gene, dic[gene]))
+            out.close()
+
+
+def main(alignments, labels, output, number):
+    diclist = []
+    for file in alignments:
+        bam_object = pysam.AlignmentFile(file, 'rb')
+        diclist.append(get_counts(bam_object))
+    writetable(diclist, labels, output, number)
+
+
+if __name__ == "__main__":
+    args = Parser()
+    main(args.alignments, args.labels, args.output, args.number)