Mercurial > repos > artbio > small_rna_map
comparison small_rna_map.py @ 7:35d3f8ac99cf draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 975e2303944d19a4124210741e5a0d1feb546e45
author | artbio |
---|---|
date | Sun, 23 Jul 2017 05:21:58 -0400 |
parents | f924a33e1eef |
children | 2cc2948cfa34 |
comparison
equal
deleted
inserted
replaced
6:f924a33e1eef | 7:35d3f8ac99cf |
---|---|
20 return args | 20 return args |
21 | 21 |
22 | 22 |
23 class Map: | 23 class Map: |
24 | 24 |
25 def __init__(self, bam_file, sample): | 25 def __init__(self, bam_file, sample, computeSize=False): |
26 self.sample_name = sample | 26 self.sample_name = sample |
27 self.bam_object = pysam.AlignmentFile(bam_file, 'rb') | 27 self.bam_object = pysam.AlignmentFile(bam_file, 'rb') |
28 self.chromosomes = dict(zip(self.bam_object.references, | 28 self.chromosomes = dict(zip(self.bam_object.references, |
29 self.bam_object.lengths)) | 29 self.bam_object.lengths)) |
30 self.map_dict = self.create_map(self.bam_object) | 30 self.map_dict = self.create_map(self.bam_object) |
31 self.max = self.compute_max(self.map_dict) | 31 self.max = self.compute_max(self.map_dict) |
32 self.mean = self.compute_mean(self.map_dict) | 32 self.mean = self.compute_mean(self.map_dict) |
33 self.median = self.compute_median(self.map_dict) | 33 self.median = self.compute_median(self.map_dict) |
34 self.coverage = self.compute_coverage(self.map_dict) | 34 self.coverage = self.compute_coverage(self.map_dict) |
35 self.size = self.compute_size(self.map_dict) | 35 if computeSize: |
36 self.size = self.compute_size(self.map_dict) | |
36 | 37 |
37 def create_map(self, bam_object): | 38 def create_map(self, bam_object): |
38 ''' | 39 ''' |
39 Returns a map_dictionary {(chromosome,read_position,polarity): | 40 Returns a map_dictionary {(chromosome,read_position,polarity): |
40 [read_length, ...]} | 41 [read_length, ...]} |
117 return coverage_dictionary | 118 return coverage_dictionary |
118 | 119 |
119 def compute_size(self, map_dictionary): | 120 def compute_size(self, map_dictionary): |
120 ''' | 121 ''' |
121 Takes a map_dictionary and returns a dictionary of sizes: | 122 Takes a map_dictionary and returns a dictionary of sizes: |
122 {chrom: {size: {polarity: nbre of reads}}} | 123 {chrom: {polarity: {size: nbre of reads}}} |
123 ''' | 124 ''' |
124 size_dictionary = defaultdict(lambda: defaultdict( | 125 size_dictionary = defaultdict(lambda: defaultdict( |
125 lambda: defaultdict( int ))) | 126 lambda: defaultdict( int ))) |
127 # to track empty chromosomes | |
128 for chrom in self.chromosomes: | |
129 if self.bam_object.count(chrom) == 0: | |
130 size_dictionary[chrom]['F'][10] = 0 | |
126 for key in map_dictionary: | 131 for key in map_dictionary: |
127 if len(map_dictionary) == 0: | |
128 # to track empty chromosomes | |
129 size_dictionary[key[0]][key[2]][size] = 0 | |
130 continue | |
131 for size in map_dictionary[key]: | 132 for size in map_dictionary[key]: |
132 size_dictionary[key[0]][key[2]][size] += 1 | 133 size_dictionary[key[0]][key[2]][size] += 1 |
133 return size_dictionary | 134 return size_dictionary |
134 | 135 |
135 def write_size_table(self, out): | 136 def write_size_table(self, out): |
171 F.write('\t'.join(header) + '\n') | 172 F.write('\t'.join(header) + '\n') |
172 if size_file_out: | 173 if size_file_out: |
173 Fs = open(size_file_out, 'w') | 174 Fs = open(size_file_out, 'w') |
174 header = ["Dataset", "Chromosome", "Polarity", "Size", "Nbr_reads"] | 175 header = ["Dataset", "Chromosome", "Polarity", "Size", "Nbr_reads"] |
175 Fs.write('\t'.join(header) + '\n') | 176 Fs.write('\t'.join(header) + '\n') |
176 for file, sample in zip(inputs, samples): | 177 for file, sample in zip(inputs, samples): |
177 mapobj = Map(file, sample) | 178 mapobj = Map(file, sample, computeSize=True) |
178 mapobj.write_table(F) | 179 mapobj.write_table(F) |
179 if size_file_out: | |
180 mapobj.write_size_table(Fs) | 180 mapobj.write_size_table(Fs) |
181 F.close() | |
182 if size_file_out: | |
183 Fs.close() | 181 Fs.close() |
182 else: | |
183 for file, sample in zip(inputs, samples): | |
184 mapobj = Map(file, sample, computeSize=False) | |
185 mapobj.write_table(F) | |
186 F.close() | |
184 | 187 |
185 | 188 |
186 if __name__ == "__main__": | 189 if __name__ == "__main__": |
187 args = Parser() | 190 args = Parser() |
188 # if identical sample names # to be tested | 191 # if identical sample names |
189 if len(set(args.sample_name)) != len(args.sample_name): | 192 if len(set(args.sample_name)) != len(args.sample_name): |
190 args.sample_name = [name + '_' + str(i) for | 193 args.sample_name = [name + '_' + str(i) for |
191 i, name in enumerate(args.sample_name)] | 194 i, name in enumerate(args.sample_name)] |
192 main(args.input, args.sample_name, args.output, args.sizes) | 195 main(args.input, args.sample_name, args.output, args.sizes) |