Mercurial > repos > artbio > small_rna_map
comparison small_rna_map.py @ 7:35d3f8ac99cf draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 975e2303944d19a4124210741e5a0d1feb546e45
| author | artbio |
|---|---|
| date | Sun, 23 Jul 2017 05:21:58 -0400 |
| parents | f924a33e1eef |
| children | 2cc2948cfa34 |
comparison
equal
deleted
inserted
replaced
| 6:f924a33e1eef | 7:35d3f8ac99cf |
|---|---|
| 20 return args | 20 return args |
| 21 | 21 |
| 22 | 22 |
| 23 class Map: | 23 class Map: |
| 24 | 24 |
| 25 def __init__(self, bam_file, sample): | 25 def __init__(self, bam_file, sample, computeSize=False): |
| 26 self.sample_name = sample | 26 self.sample_name = sample |
| 27 self.bam_object = pysam.AlignmentFile(bam_file, 'rb') | 27 self.bam_object = pysam.AlignmentFile(bam_file, 'rb') |
| 28 self.chromosomes = dict(zip(self.bam_object.references, | 28 self.chromosomes = dict(zip(self.bam_object.references, |
| 29 self.bam_object.lengths)) | 29 self.bam_object.lengths)) |
| 30 self.map_dict = self.create_map(self.bam_object) | 30 self.map_dict = self.create_map(self.bam_object) |
| 31 self.max = self.compute_max(self.map_dict) | 31 self.max = self.compute_max(self.map_dict) |
| 32 self.mean = self.compute_mean(self.map_dict) | 32 self.mean = self.compute_mean(self.map_dict) |
| 33 self.median = self.compute_median(self.map_dict) | 33 self.median = self.compute_median(self.map_dict) |
| 34 self.coverage = self.compute_coverage(self.map_dict) | 34 self.coverage = self.compute_coverage(self.map_dict) |
| 35 self.size = self.compute_size(self.map_dict) | 35 if computeSize: |
| 36 self.size = self.compute_size(self.map_dict) | |
| 36 | 37 |
| 37 def create_map(self, bam_object): | 38 def create_map(self, bam_object): |
| 38 ''' | 39 ''' |
| 39 Returns a map_dictionary {(chromosome,read_position,polarity): | 40 Returns a map_dictionary {(chromosome,read_position,polarity): |
| 40 [read_length, ...]} | 41 [read_length, ...]} |
| 117 return coverage_dictionary | 118 return coverage_dictionary |
| 118 | 119 |
| 119 def compute_size(self, map_dictionary): | 120 def compute_size(self, map_dictionary): |
| 120 ''' | 121 ''' |
| 121 Takes a map_dictionary and returns a dictionary of sizes: | 122 Takes a map_dictionary and returns a dictionary of sizes: |
| 122 {chrom: {size: {polarity: nbre of reads}}} | 123 {chrom: {polarity: {size: nbre of reads}}} |
| 123 ''' | 124 ''' |
| 124 size_dictionary = defaultdict(lambda: defaultdict( | 125 size_dictionary = defaultdict(lambda: defaultdict( |
| 125 lambda: defaultdict( int ))) | 126 lambda: defaultdict( int ))) |
| 127 # to track empty chromosomes | |
| 128 for chrom in self.chromosomes: | |
| 129 if self.bam_object.count(chrom) == 0: | |
| 130 size_dictionary[chrom]['F'][10] = 0 | |
| 126 for key in map_dictionary: | 131 for key in map_dictionary: |
| 127 if len(map_dictionary) == 0: | |
| 128 # to track empty chromosomes | |
| 129 size_dictionary[key[0]][key[2]][size] = 0 | |
| 130 continue | |
| 131 for size in map_dictionary[key]: | 132 for size in map_dictionary[key]: |
| 132 size_dictionary[key[0]][key[2]][size] += 1 | 133 size_dictionary[key[0]][key[2]][size] += 1 |
| 133 return size_dictionary | 134 return size_dictionary |
| 134 | 135 |
| 135 def write_size_table(self, out): | 136 def write_size_table(self, out): |
| 171 F.write('\t'.join(header) + '\n') | 172 F.write('\t'.join(header) + '\n') |
| 172 if size_file_out: | 173 if size_file_out: |
| 173 Fs = open(size_file_out, 'w') | 174 Fs = open(size_file_out, 'w') |
| 174 header = ["Dataset", "Chromosome", "Polarity", "Size", "Nbr_reads"] | 175 header = ["Dataset", "Chromosome", "Polarity", "Size", "Nbr_reads"] |
| 175 Fs.write('\t'.join(header) + '\n') | 176 Fs.write('\t'.join(header) + '\n') |
| 176 for file, sample in zip(inputs, samples): | 177 for file, sample in zip(inputs, samples): |
| 177 mapobj = Map(file, sample) | 178 mapobj = Map(file, sample, computeSize=True) |
| 178 mapobj.write_table(F) | 179 mapobj.write_table(F) |
| 179 if size_file_out: | |
| 180 mapobj.write_size_table(Fs) | 180 mapobj.write_size_table(Fs) |
| 181 F.close() | |
| 182 if size_file_out: | |
| 183 Fs.close() | 181 Fs.close() |
| 182 else: | |
| 183 for file, sample in zip(inputs, samples): | |
| 184 mapobj = Map(file, sample, computeSize=False) | |
| 185 mapobj.write_table(F) | |
| 186 F.close() | |
| 184 | 187 |
| 185 | 188 |
| 186 if __name__ == "__main__": | 189 if __name__ == "__main__": |
| 187 args = Parser() | 190 args = Parser() |
| 188 # if identical sample names # to be tested | 191 # if identical sample names |
| 189 if len(set(args.sample_name)) != len(args.sample_name): | 192 if len(set(args.sample_name)) != len(args.sample_name): |
| 190 args.sample_name = [name + '_' + str(i) for | 193 args.sample_name = [name + '_' + str(i) for |
| 191 i, name in enumerate(args.sample_name)] | 194 i, name in enumerate(args.sample_name)] |
| 192 main(args.input, args.sample_name, args.output, args.sizes) | 195 main(args.input, args.sample_name, args.output, args.sizes) |
