comparison small_rna_map.py @ 8:2cc2948cfa34 draft default tip

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit b6c6b4d92a8d839e59f7ac28cb169d25ca68ef0d
author artbio
date Sun, 23 Jul 2017 13:54:29 -0400
parents 35d3f8ac99cf
children
comparison
equal deleted inserted replaced
7:35d3f8ac99cf 8:2cc2948cfa34
38 def create_map(self, bam_object): 38 def create_map(self, bam_object):
39 ''' 39 '''
40 Returns a map_dictionary {(chromosome,read_position,polarity): 40 Returns a map_dictionary {(chromosome,read_position,polarity):
41 [read_length, ...]} 41 [read_length, ...]}
42 ''' 42 '''
43 map_dictionary = dict() 43 map_dictionary = defaultdict(list)
44 for chrom in self.chromosomes: 44 # get empty value for start and end of each chromosome
45 for pos in range(self.chromosomes[chrom]): 45 for chrom in self.chromosomes:
46 map_dictionary[(chrom, pos+1, 'F')] = [] # get all chromosomes 46 map_dictionary[(chrom, 1, 'F')] = []
47 map_dictionary[(chrom, pos+1, 'R')] = [] # get all chromosomes 47 map_dictionary[(chrom, self.chromosomes[chrom], 'F')] = []
48 for chrom in self.chromosomes: 48 for chrom in self.chromosomes:
49 for read in bam_object.fetch(chrom): 49 for read in bam_object.fetch(chrom):
50 read_positions = read.positions # a list of covered positions 50 positions = read.positions # a list of covered positions
51 for pos in positions:
52 if not map_dictionary[(chrom, pos+1, 'F')]:
53 map_dictionary[(chrom, pos+1, 'F')] = []
54 if not map_dictionary[(chrom, pos+1, 'R')]:
55 map_dictionary[(chrom, pos+1, 'R')] = []
51 if read.is_reverse: 56 if read.is_reverse:
52 map_dictionary[(chrom, read_positions[-1]+1, 57 map_dictionary[(chrom, positions[-1]+1,
53 'R')].append(read.query_alignment_length) 58 'R')].append(read.query_alignment_length)
54 else: 59 else:
55 map_dictionary[(chrom, read_positions[0]+1, 60 map_dictionary[(chrom, positions[0]+1,
56 'F')].append(read.query_alignment_length) 61 'F')].append(read.query_alignment_length)
57 return map_dictionary 62 return map_dictionary
58 63
59 def compute_max(self, map_dictionary): 64 def compute_max(self, map_dictionary):
60 ''' 65 '''
98 median_dictionary[key] = numpy.median(map_dictionary[key]) 103 median_dictionary[key] = numpy.median(map_dictionary[key])
99 return median_dictionary 104 return median_dictionary
100 105
101 def compute_coverage(self, map_dictionary, quality=10): 106 def compute_coverage(self, map_dictionary, quality=10):
102 ''' 107 '''
103 Takes a map_dictionary and returns a dictionary of lists 108 takes a map_dictionary as input and returns
104 of coverage along the coordinates of pre_mirs (as keys) 109 a coverage_dictionary {(chromosome,read_position,polarity):
110 coverage}
105 ''' 111 '''
106 coverage_dictionary = dict() 112 coverage_dictionary = dict()
107 for chrom in self.chromosomes: 113 for chrom in self.chromosomes:
114 coverage_dictionary[(chrom, 1, 'F')] = 0
115 coverage_dictionary[(chrom, self.chromosomes[chrom], 'F')] = 0
116
117 for key in map_dictionary:
108 coverage = self.bam_object.count_coverage( 118 coverage = self.bam_object.count_coverage(
109 reference=chrom, 119 reference=key[0],
110 start=0, 120 start=key[1]-1,
111 end=self.chromosomes[chrom], 121 end=key[1],
112 quality_threshold=quality) 122 quality_threshold=quality)
113 """ Add the 4 coverage values """ 123 """ Add the 4 coverage values """
114 coverage = [sum(x) for x in zip(*coverage)] 124 coverage = [sum(x) for x in zip(*coverage)]
115 for pos, cov in enumerate(coverage): 125 coverage_dictionary[key] = coverage[0]
116 coverage_dictionary[(chrom, pos+1, "F")] = cov 126 # coverage_dictionary[(key[0], key[1], 'R')] = coverage
117 coverage_dictionary[(chrom, pos+1, "R")] = cov
118 return coverage_dictionary 127 return coverage_dictionary
119 128
120 def compute_size(self, map_dictionary): 129 def compute_size(self, map_dictionary):
121 ''' 130 '''
122 Takes a map_dictionary and returns a dictionary of sizes: 131 Takes a map_dictionary and returns a dictionary of sizes: