comparison small_rna_map.py @ 7:35d3f8ac99cf draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 975e2303944d19a4124210741e5a0d1feb546e45
author artbio
date Sun, 23 Jul 2017 05:21:58 -0400
parents f924a33e1eef
children 2cc2948cfa34
comparison
equal deleted inserted replaced
6:f924a33e1eef 7:35d3f8ac99cf
20 return args 20 return args
21 21
22 22
23 class Map: 23 class Map:
24 24
25 def __init__(self, bam_file, sample): 25 def __init__(self, bam_file, sample, computeSize=False):
26 self.sample_name = sample 26 self.sample_name = sample
27 self.bam_object = pysam.AlignmentFile(bam_file, 'rb') 27 self.bam_object = pysam.AlignmentFile(bam_file, 'rb')
28 self.chromosomes = dict(zip(self.bam_object.references, 28 self.chromosomes = dict(zip(self.bam_object.references,
29 self.bam_object.lengths)) 29 self.bam_object.lengths))
30 self.map_dict = self.create_map(self.bam_object) 30 self.map_dict = self.create_map(self.bam_object)
31 self.max = self.compute_max(self.map_dict) 31 self.max = self.compute_max(self.map_dict)
32 self.mean = self.compute_mean(self.map_dict) 32 self.mean = self.compute_mean(self.map_dict)
33 self.median = self.compute_median(self.map_dict) 33 self.median = self.compute_median(self.map_dict)
34 self.coverage = self.compute_coverage(self.map_dict) 34 self.coverage = self.compute_coverage(self.map_dict)
35 self.size = self.compute_size(self.map_dict) 35 if computeSize:
36 self.size = self.compute_size(self.map_dict)
36 37
37 def create_map(self, bam_object): 38 def create_map(self, bam_object):
38 ''' 39 '''
39 Returns a map_dictionary {(chromosome,read_position,polarity): 40 Returns a map_dictionary {(chromosome,read_position,polarity):
40 [read_length, ...]} 41 [read_length, ...]}
117 return coverage_dictionary 118 return coverage_dictionary
118 119
119 def compute_size(self, map_dictionary): 120 def compute_size(self, map_dictionary):
120 ''' 121 '''
121 Takes a map_dictionary and returns a dictionary of sizes: 122 Takes a map_dictionary and returns a dictionary of sizes:
122 {chrom: {size: {polarity: nbre of reads}}} 123 {chrom: {polarity: {size: nbre of reads}}}
123 ''' 124 '''
124 size_dictionary = defaultdict(lambda: defaultdict( 125 size_dictionary = defaultdict(lambda: defaultdict(
125 lambda: defaultdict( int ))) 126 lambda: defaultdict( int )))
127 # to track empty chromosomes
128 for chrom in self.chromosomes:
129 if self.bam_object.count(chrom) == 0:
130 size_dictionary[chrom]['F'][10] = 0
126 for key in map_dictionary: 131 for key in map_dictionary:
127 if len(map_dictionary) == 0:
128 # to track empty chromosomes
129 size_dictionary[key[0]][key[2]][size] = 0
130 continue
131 for size in map_dictionary[key]: 132 for size in map_dictionary[key]:
132 size_dictionary[key[0]][key[2]][size] += 1 133 size_dictionary[key[0]][key[2]][size] += 1
133 return size_dictionary 134 return size_dictionary
134 135
135 def write_size_table(self, out): 136 def write_size_table(self, out):
171 F.write('\t'.join(header) + '\n') 172 F.write('\t'.join(header) + '\n')
172 if size_file_out: 173 if size_file_out:
173 Fs = open(size_file_out, 'w') 174 Fs = open(size_file_out, 'w')
174 header = ["Dataset", "Chromosome", "Polarity", "Size", "Nbr_reads"] 175 header = ["Dataset", "Chromosome", "Polarity", "Size", "Nbr_reads"]
175 Fs.write('\t'.join(header) + '\n') 176 Fs.write('\t'.join(header) + '\n')
176 for file, sample in zip(inputs, samples): 177 for file, sample in zip(inputs, samples):
177 mapobj = Map(file, sample) 178 mapobj = Map(file, sample, computeSize=True)
178 mapobj.write_table(F) 179 mapobj.write_table(F)
179 if size_file_out:
180 mapobj.write_size_table(Fs) 180 mapobj.write_size_table(Fs)
181 F.close()
182 if size_file_out:
183 Fs.close() 181 Fs.close()
182 else:
183 for file, sample in zip(inputs, samples):
184 mapobj = Map(file, sample, computeSize=False)
185 mapobj.write_table(F)
186 F.close()
184 187
185 188
186 if __name__ == "__main__": 189 if __name__ == "__main__":
187 args = Parser() 190 args = Parser()
188 # if identical sample names # to be tested 191 # if identical sample names
189 if len(set(args.sample_name)) != len(args.sample_name): 192 if len(set(args.sample_name)) != len(args.sample_name):
190 args.sample_name = [name + '_' + str(i) for 193 args.sample_name = [name + '_' + str(i) for
191 i, name in enumerate(args.sample_name)] 194 i, name in enumerate(args.sample_name)]
192 main(args.input, args.sample_name, args.output, args.sizes) 195 main(args.input, args.sample_name, args.output, args.sizes)