comparison conifer_functions.py @ 28:629681116b5f draft

Uploaded
author bzonnedda
date Fri, 07 Jul 2017 04:04:25 -0400
parents cbe4551cdc27
children 2e8281109e79
comparison
equal deleted inserted replaced
27:02091c92f4f1 28:629681116b5f
77 class sample(IsDescription): 77 class sample(IsDescription):
78 sampleID = StringCol(100,pos=0) # 20-char string (sampleID) 78 sampleID = StringCol(100,pos=0) # 20-char string (sampleID)
79 79
80 def loadProbeList(CF_probe_filename): 80 def loadProbeList(CF_probe_filename):
81 # Load data files 81 # Load data files
82 probefile = open(CF_probe_filename) 82 probefile = open(CF_probe_filename, 'rb')
83 s = csv.Sniffer() 83 s = csv.Sniffer()
84 header = s.has_header(probefile.read(1024)) 84 header = s.has_header(probefile.read(1024))
85 probefile.seek(0) 85 probefile.seek(0)
86 dialect = s.sniff(probefile.read(1024)) 86 dialect = s.sniff(probefile.read(1024))
87 probefile.seek(0) 87 probefile.seek(0)
143 143
144 def plotGenomicCoords(plt, rpkm_data,fontsize=10,rotation=0): 144 def plotGenomicCoords(plt, rpkm_data,fontsize=10,rotation=0):
145 import operator 145 import operator
146 import locale 146 import locale
147 exon_set = rpkm_data.exons 147 exon_set = rpkm_data.exons
148 genomic_coords = np.array(list(map(operator.itemgetter("start"),exon_set))) 148 genomic_coords = np.array(map(operator.itemgetter("start"),exon_set))
149 149
150 ticks = list(range(0,len(exon_set),len(exon_set)/5)) 150 ticks = range(0,len(exon_set),len(exon_set)/5)
151 151
152 ticks[-1] -= 1 # the last tick is going to be off the chart, so we estimate it as the second to last genomic coord. 152 ticks[-1] -= 1 # the last tick is going to be off the chart, so we estimate it as the second to last genomic coord.
153 labels = [locale.format("%d", genomic_coords[i], grouping=True) for i in ticks if i < len(genomic_coords)] 153 labels = [locale.format("%d", genomic_coords[i], grouping=True) for i in ticks if i < len(genomic_coords)]
154 if rotation != 0: 154 if rotation != 0:
155 ha = "right" 155 ha = "right"
175 def mergeCalls(calls): 175 def mergeCalls(calls):
176 if len(calls) == 0: 176 if len(calls) == 0:
177 return [] 177 return []
178 178
179 out_calls = [] 179 out_calls = []
180 calls=np.array(calls)[np.argsort(np.array(list(map(operator.itemgetter("start"),calls)),dtype=np.int))] 180 calls=np.array(calls)[np.argsort(np.array(map(operator.itemgetter("start"),calls),dtype=np.int))]
181 pstart = calls[0]["start"] 181 pstart = calls[0]["start"]
182 pstop = calls[0]["stop"] 182 pstop = calls[0]["stop"]
183 for d in calls: 183 for d in calls:
184 if d["start"] <= pstop: 184 if d["start"] <= pstop:
185 pstop = max(d["stop"],pstop) 185 pstop = max(d["stop"],pstop)
200 self.calls = [] 200 self.calls = []
201 self.refined_calls = [] 201 self.refined_calls = []
202 202
203 def smooth(self, window = 15, padded = False): #todo, fix the padding here 203 def smooth(self, window = 15, padded = False): #todo, fix the padding here
204 if self.isGenotype: 204 if self.isGenotype:
205 print("Warning: the data in this rpkm_data container are single genotype values. Smoothing will have no effect!") 205 print "Warning: the data in this rpkm_data container are single genotype values. Smoothing will have no effect!"
206 return self.rpkm 206 return self.rpkm
207 207
208 if window > 0: 208 if window > 0:
209 weightings=np.blackman(window) 209 weightings=np.blackman(window)
210 weightings = weightings/weightings.sum() 210 weightings = weightings/weightings.sum()
247 class rpkm_reader: 247 class rpkm_reader:
248 def __init__(self, rpkm_fn=None): 248 def __init__(self, rpkm_fn=None):
249 """Initialize an rpkm_reader instance. Specify the location of the data file""" 249 """Initialize an rpkm_reader instance. Specify the location of the data file"""
250 250
251 if rpkm_fn == None: 251 if rpkm_fn == None:
252 print("Must specify RPKM HDF5 file!") 252 print "Must specify RPKM HDF5 file!"
253 return 0 253 return 0
254 # set up file access 254 # set up file access
255 self.h5file = openFile(rpkm_fn, mode='r') 255 # self.h5file = openFile(rpkm_fn, mode='r')
256 self.h5file = open_file(rpkm_fn, mode='r')
256 self.sample_table = self.h5file.root.samples.samples 257 self.sample_table = self.h5file.root.samples.samples
257 258
258 def __del__(self): 259 def __del__(self):
259 self.h5file.close() 260 self.h5file.close()
260 261