diff small_rna_map.py @ 7:35d3f8ac99cf draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 975e2303944d19a4124210741e5a0d1feb546e45
author artbio
date Sun, 23 Jul 2017 05:21:58 -0400
parents f924a33e1eef
children 2cc2948cfa34
line wrap: on
line diff
--- a/small_rna_map.py	Sun Jul 23 03:43:40 2017 -0400
+++ b/small_rna_map.py	Sun Jul 23 05:21:58 2017 -0400
@@ -22,7 +22,7 @@
 
 class Map:
 
-    def __init__(self, bam_file, sample):
+    def __init__(self, bam_file, sample, computeSize=False):
         self.sample_name = sample
         self.bam_object = pysam.AlignmentFile(bam_file, 'rb')
         self.chromosomes = dict(zip(self.bam_object.references,
@@ -32,7 +32,8 @@
         self.mean = self.compute_mean(self.map_dict)
         self.median = self.compute_median(self.map_dict)
         self.coverage = self.compute_coverage(self.map_dict)
-        self.size = self.compute_size(self.map_dict)
+        if computeSize:
+            self.size = self.compute_size(self.map_dict)
 
     def create_map(self, bam_object):
         '''
@@ -119,15 +120,15 @@
     def compute_size(self, map_dictionary):
         '''
         Takes a map_dictionary and returns a dictionary of sizes:
-        {chrom: {size: {polarity: nbre of reads}}}
+        {chrom: {polarity: {size: nbre of reads}}}
         '''
         size_dictionary = defaultdict(lambda: defaultdict(
                                       lambda: defaultdict( int )))
+        #  to track empty chromosomes
+        for chrom in self.chromosomes:
+            if self.bam_object.count(chrom) == 0:
+                size_dictionary[chrom]['F'][10] = 0
         for key in map_dictionary:
-            if len(map_dictionary) == 0:
-                #  to track empty chromosomes
-                size_dictionary[key[0]][key[2]][size] = 0
-                continue
             for size in map_dictionary[key]:
                 size_dictionary[key[0]][key[2]][size] += 1
         return size_dictionary
@@ -173,19 +174,21 @@
         Fs = open(size_file_out, 'w')
         header = ["Dataset", "Chromosome", "Polarity", "Size", "Nbr_reads"]
         Fs.write('\t'.join(header) + '\n')
-    for file, sample in zip(inputs, samples):
-        mapobj = Map(file, sample)
-        mapobj.write_table(F)
-        if size_file_out:
+        for file, sample in zip(inputs, samples):
+            mapobj = Map(file, sample, computeSize=True)
+            mapobj.write_table(F)
             mapobj.write_size_table(Fs)
-    F.close()
-    if size_file_out:
         Fs.close()
+    else:
+        for file, sample in zip(inputs, samples):
+            mapobj = Map(file, sample, computeSize=False)
+            mapobj.write_table(F)
+        F.close()
 
 
 if __name__ == "__main__":
     args = Parser()
-    # if identical sample names # to be tested
+    # if identical sample names
     if len(set(args.sample_name)) != len(args.sample_name):
         args.sample_name = [name + '_' + str(i) for
                             i, name in enumerate(args.sample_name)]