comparison kmer_analysis.py @ 19:385fc64fa988 draft default tip

Uploaded
author rlegendre
date Fri, 12 Jun 2015 11:32:59 -0400
parents a121cce43f90
children
comparison
equal deleted inserted replaced
18:a121cce43f90 19:385fc64fa988
208 with open(tmpdir+"/assoCov_"+chrom+".txt") as f : 208 with open(tmpdir+"/assoCov_"+chrom+".txt") as f :
209 data = f.readlines() 209 data = f.readlines()
210 except IOError : 210 except IOError :
211 print tmpdir+"/assoCov_"+chrom+".txt doesn't exist" 211 print tmpdir+"/assoCov_"+chrom+".txt doesn't exist"
212 212
213 213 try:
214 ## if a gene without intron : 214 ## if a gene without intron :
215 if GFF[gene]['exon_number'] == 1: 215 if GFF[gene]['exon_number'] == 1:
216 216
217 ## get coverage for each gene 217 ## get coverage for each gene
218 if GFF[gene]['strand'] == "+": 218 if GFF[gene]['strand'] == "+":
219 for i in range(GFF[gene]['exon'][1]['start'],GFF[gene]['exon'][1]['stop']+1): 219 for i in range(GFF[gene]['exon'][1]['start'],GFF[gene]['exon'][1]['stop']+1):
220 cov.append(int((data[i].rstrip()).split("\t")[0])) 220 cov.append(int((data[i].rstrip()).split("\t")[0]))
221 else :
222 for i in range(GFF[gene]['exon'][1]['start'],GFF[gene]['exon'][1]['stop']+1):
223 cov.append(int(((data[i].rstrip()).split("\t")[1]).replace("-","")))
224 cov.reverse()
221 else : 225 else :
222 for i in range(GFF[gene]['exon'][1]['start'],GFF[gene]['exon'][1]['stop']+1): 226 ## For each gene, get coverage and sum of exon size
223 cov.append(int(((data[i].rstrip()).split("\t")[1]).replace("-",""))) 227 if GFF[gene]['strand'] == "+":
224 cov.reverse()
225 else :
226 ## For each gene, get coverage and sum of exon size
227 if GFF[gene]['strand'] == "+":
228
229 for exon in range(1,GFF[gene]['exon_number']+1) :
230 for i in range(GFF[gene]['exon'][exon]['start'],GFF[gene]['exon'][exon]['stop']+1):
231 #if i <= GFF[gene]['stop'] :
232 cov.append(int((data[i].rstrip()).split("\t")[0]))
233 else :
234
235 for exon in range(1,GFF[gene]['exon_number']+1) :
236 for i in range(GFF[gene]['exon'][exon]['start'],GFF[gene]['exon'][exon]['stop']+1):
237 #if i <= GFF[gene]['start'] :
238 cov.append(int(((data[i].rstrip()).split("\t")[1]).replace("-","")))
239 cov.reverse()
240 228
229 for exon in range(1,GFF[gene]['exon_number']+1) :
230 for i in range(GFF[gene]['exon'][exon]['start'],GFF[gene]['exon'][exon]['stop']+1):
231 #if i <= GFF[gene]['stop'] :
232 cov.append(int((data[i].rstrip()).split("\t")[0]))
233 else :
234
235 for exon in range(1,GFF[gene]['exon_number']+1) :
236 for i in range(GFF[gene]['exon'][exon]['start'],GFF[gene]['exon'][exon]['stop']+1):
237 #if i <= GFF[gene]['start'] :
238 cov.append(int(((data[i].rstrip()).split("\t")[1]).replace("-","")))
239 cov.reverse()
240 except :
241 #print gene+" could not be analysed."
242 #del GFF[gene]
243 continue
241 len_cov = len(cov) 244 len_cov = len(cov)
242 prop = [0,0,0] 245 prop = [0,0,0]
243 for nuc in range(0,len_cov-2,3) : 246 for nuc in range(0,len_cov-2,3) :
244 ## Calculate proportion 247 ## Calculate proportion
245 prop[0] += cov[nuc] 248 prop[0] += cov[nuc]