comparison conifer_functions.py @ 26:cbe4551cdc27 draft

Uploaded
author bzonnedda
date Thu, 06 Jul 2017 06:28:47 -0400
parents 9c5a978ce25d
children 629681116b5f
comparison
equal deleted inserted replaced
25:abdc05fae16e 26:cbe4551cdc27
23 # along with this program. If not, see <http://www.gnu.org/licenses/>. 23 # along with this program. If not, see <http://www.gnu.org/licenses/>.
24 ####################################################################### 24 #######################################################################
25 ####################################################################### 25 #######################################################################
26 26
27 import csv 27 import csv
28 #from tables import * 28 from tables import *
29 import numpy as np 29 import numpy as np
30 import operator 30 import operator
31 31
32 class rpkm_value(IsDescription): 32 class rpkm_value(IsDescription):
33 probeID = UInt32Col(pos=0) 33 probeID = UInt32Col(pos=0)
77 class sample(IsDescription): 77 class sample(IsDescription):
78 sampleID = StringCol(100,pos=0) # 20-char string (sampleID) 78 sampleID = StringCol(100,pos=0) # 20-char string (sampleID)
79 79
80 def loadProbeList(CF_probe_filename): 80 def loadProbeList(CF_probe_filename):
81 # Load data files 81 # Load data files
82 probefile = open(CF_probe_filename, 'rb') 82 probefile = open(CF_probe_filename)
83 s = csv.Sniffer() 83 s = csv.Sniffer()
84 header = s.has_header(probefile.read(1024)) 84 header = s.has_header(probefile.read(1024))
85 probefile.seek(0) 85 probefile.seek(0)
86 dialect = s.sniff(probefile.read(1024)) 86 dialect = s.sniff(probefile.read(1024))
87 probefile.seek(0) 87 probefile.seek(0)
143 143
144 def plotGenomicCoords(plt, rpkm_data,fontsize=10,rotation=0): 144 def plotGenomicCoords(plt, rpkm_data,fontsize=10,rotation=0):
145 import operator 145 import operator
146 import locale 146 import locale
147 exon_set = rpkm_data.exons 147 exon_set = rpkm_data.exons
148 genomic_coords = np.array(map(operator.itemgetter("start"),exon_set)) 148 genomic_coords = np.array(list(map(operator.itemgetter("start"),exon_set)))
149 149
150 ticks = range(0,len(exon_set),len(exon_set)/5) 150 ticks = list(range(0,len(exon_set),len(exon_set)/5))
151 151
152 ticks[-1] -= 1 # the last tick is going to be off the chart, so we estimate it as the second to last genomic coord. 152 ticks[-1] -= 1 # the last tick is going to be off the chart, so we estimate it as the second to last genomic coord.
153 labels = [locale.format("%d", genomic_coords[i], grouping=True) for i in ticks if i < len(genomic_coords)] 153 labels = [locale.format("%d", genomic_coords[i], grouping=True) for i in ticks if i < len(genomic_coords)]
154 if rotation != 0: 154 if rotation != 0:
155 ha = "right" 155 ha = "right"
175 def mergeCalls(calls): 175 def mergeCalls(calls):
176 if len(calls) == 0: 176 if len(calls) == 0:
177 return [] 177 return []
178 178
179 out_calls = [] 179 out_calls = []
180 calls=np.array(calls)[np.argsort(np.array(map(operator.itemgetter("start"),calls),dtype=np.int))] 180 calls=np.array(calls)[np.argsort(np.array(list(map(operator.itemgetter("start"),calls)),dtype=np.int))]
181 pstart = calls[0]["start"] 181 pstart = calls[0]["start"]
182 pstop = calls[0]["stop"] 182 pstop = calls[0]["stop"]
183 for d in calls: 183 for d in calls:
184 if d["start"] <= pstop: 184 if d["start"] <= pstop:
185 pstop = max(d["stop"],pstop) 185 pstop = max(d["stop"],pstop)
200 self.calls = [] 200 self.calls = []
201 self.refined_calls = [] 201 self.refined_calls = []
202 202
203 def smooth(self, window = 15, padded = False): #todo, fix the padding here 203 def smooth(self, window = 15, padded = False): #todo, fix the padding here
204 if self.isGenotype: 204 if self.isGenotype:
205 print "Warning: the data in this rpkm_data container are single genotype values. Smoothing will have no effect!" 205 print("Warning: the data in this rpkm_data container are single genotype values. Smoothing will have no effect!")
206 return self.rpkm 206 return self.rpkm
207 207
208 if window > 0: 208 if window > 0:
209 weightings=np.blackman(window) 209 weightings=np.blackman(window)
210 weightings = weightings/weightings.sum() 210 weightings = weightings/weightings.sum()
247 class rpkm_reader: 247 class rpkm_reader:
248 def __init__(self, rpkm_fn=None): 248 def __init__(self, rpkm_fn=None):
249 """Initialize an rpkm_reader instance. Specify the location of the data file""" 249 """Initialize an rpkm_reader instance. Specify the location of the data file"""
250 250
251 if rpkm_fn == None: 251 if rpkm_fn == None:
252 print "Must specify RPKM HDF5 file!" 252 print("Must specify RPKM HDF5 file!")
253 return 0 253 return 0
254 # set up file access 254 # set up file access
255 self.h5file = openFile(rpkm_fn, mode='r') 255 self.h5file = openFile(rpkm_fn, mode='r')
256 self.sample_table = self.h5file.root.samples.samples 256 self.sample_table = self.h5file.root.samples.samples
257 257