6
|
1 #!/usr/bin/env python2.7.3
|
|
2 #-*- coding: utf-8 -*-
|
|
3
|
|
4 '''
|
|
5 Created on Dec. 2013
|
|
6 @author: rachel legendre
|
|
7 @copyright: rachel.legendre@igmors.u-psud.fr
|
|
8 @license: GPL v3
|
|
9 '''
|
|
10
|
|
11 import os, sys, time, optparse, shutil, re, urllib, subprocess, tempfile
|
|
12 from urllib import unquote
|
|
13 from Bio import SeqIO
|
|
14 import csv
|
|
15 import pysam
|
|
16 import HTSeq
|
|
17 #from matplotlib import pyplot as pl
|
|
18 import matplotlib
|
|
19 matplotlib.use('Agg')
|
|
20 import matplotlib.pyplot as pl
|
|
21 from numpy import arange
|
|
22 from matplotlib import ticker as t
|
|
23 from PIL import Image
|
|
24
|
|
25 def stop_err( msg ):
|
|
26 sys.stderr.write( "%s\n" % msg )
|
|
27 sys.stderr.write( "Programme aborted at %s\n" % time.asctime(time.localtime(time.time())))
|
|
28 sys.exit()
|
|
29
|
|
30
|
|
31 def store_gff(gff):
|
|
32 '''
|
|
33 parse and store gff file in a dictionnary
|
|
34 '''
|
|
35 try:
|
|
36 GFF = {}
|
|
37 with open(gff, 'r') as f_gff :
|
|
38 GFF['order'] = []
|
|
39 for line in f_gff:
|
|
40 ## switch commented lines
|
|
41 line = line.split("#")[0]
|
|
42 if line != "" :
|
|
43 feature = (line.split('\t')[8]).split(';')
|
|
44 # first line is already gene line :
|
|
45 if line.split('\t')[2] == 'gene' :
|
|
46 gene = feature[0].replace("ID=","")
|
|
47 if re.search('gene',feature[2]) :
|
|
48 Name = feature[2].replace("gene=","")
|
|
49 else :
|
|
50 Name = "Unknown"
|
|
51 ##get annotation
|
|
52 note = re.sub(r".+\;Note\=(.+)\;display\=.+", r"\1", line)
|
|
53 note = unquote(str(note)).replace("\n","")
|
|
54 ## store gene information
|
|
55 GFF['order'].append(gene)
|
|
56 GFF[gene] = {}
|
|
57 GFF[gene]['chrom'] = line.split('\t')[0]
|
|
58 GFF[gene]['start'] = int(line.split('\t')[3])
|
|
59 GFF[gene]['stop'] = int(line.split('\t')[4])
|
|
60 GFF[gene]['strand'] = line.split('\t')[6]
|
|
61 GFF[gene]['name'] = Name
|
|
62 GFF[gene]['note'] = note
|
|
63 GFF[gene]['exon'] = {}
|
|
64 GFF[gene]['exon_number'] = 0
|
|
65 #print Name
|
|
66 elif line.split('\t')[2] == 'CDS' :
|
|
67 gene = re.sub(r".?Parent\=(.+)(_mRNA)+", r"\1", feature[0])
|
|
68 if GFF.has_key(gene) :
|
|
69 GFF[gene]['exon_number'] += 1
|
|
70 exon_number = GFF[gene]['exon_number']
|
|
71 GFF[gene]['exon'][exon_number] = {}
|
|
72 GFF[gene]['exon'][exon_number]['frame'] = line.split('\t')[7]
|
|
73 GFF[gene]['exon'][exon_number]['start'] = int(line.split('\t')[3])
|
|
74 GFF[gene]['exon'][exon_number]['stop'] = int(line.split('\t')[4])
|
|
75
|
|
76 ## if there is a five prim UTR intron, we change start of gene
|
|
77 elif line.split('\t')[2] == 'five_prime_UTR_intron' :
|
|
78 if GFF[gene]['strand'] == "+" :
|
|
79 GFF[gene]['start'] = GFF[gene]['exon'][1]['start']
|
|
80 else :
|
|
81 GFF[gene]['stop'] = GFF[gene]['exon'][exon_number]['stop']
|
|
82 return GFF
|
|
83 except Exception,e:
|
|
84 stop_err( 'Error during gff storage : ' + str( e ) )
|
|
85
|
|
86
|
|
87
|
|
88
|
|
89 def compute_rpkm(length,count_gene,count_tot) :
|
|
90
|
|
91 try :
|
|
92 rpkm = "{0:.4f}".format(count_gene*1000000.0/(count_tot*length))
|
|
93 except ArithmeticError :
|
|
94 stop_err( 'Illegal division by zero')
|
|
95 return float(rpkm)
|
|
96
|
|
97
|
|
98 def find_stop(seq) :
|
|
99 '''
|
|
100 Find stop codon in a sequence and return position of first nucleotide in stop
|
|
101 '''
|
|
102 stop_codon = ['TAA','TAG','TGA']
|
|
103 for nt in range(0,len(seq)-3,3) :
|
|
104 codon = seq[nt:nt+3]
|
|
105 if codon in stop_codon :
|
|
106 return nt
|
|
107
|
|
108 return -1
|
|
109
|
|
110
|
|
111 def check_met(seq):
|
|
112 '''
|
|
113 Boolean function for testing presence or absence of methionine in 5 codons following stop codon
|
|
114 '''
|
|
115 met = 'ATG'
|
|
116 for pos in range(0,15,3) :
|
|
117 codon = seq[pos:pos+3]
|
|
118 if codon in met :
|
|
119 return True
|
|
120
|
|
121 return False
|
|
122
|
|
123
|
|
124 '''
|
|
125 feature.iv is a GenomicInterval object :
|
|
126 A GenomicInterval object has the following slots, some of which
|
|
127 are calculated from the other:
|
|
128
|
|
129 chrom: The name of a sequence (i.e., chromosome, contig, or
|
|
130 the like).
|
|
131 start: The start of the interval. Even on the reverse strand,
|
|
132 this is always the smaller of the two values 'start' and 'end'.
|
|
133 Note that all positions should be given as 0-based value!
|
|
134 end: The end of the interval. Following Python convention for
|
|
135 ranges, this in one more than the coordinate of the last base
|
|
136 that is considered part of the sequence.
|
|
137 strand: The strand, as a single character, '+' or '-'. '.' indicates
|
|
138 that the strand is irrelavant. (Alternatively, pass a Strand object.)
|
|
139 length: The length of the interval, i.e., end - start
|
|
140 start_d: The "directional start" position. This is the position of the
|
|
141 first base of the interval, taking the strand into account. Hence,
|
|
142 this is the same as 'start' except when strand == '-', in which
|
|
143 case it is end-1.
|
|
144 end_d: The "directional end": Usually, the same as 'end', but for
|
|
145 strand=='-1', it is start-1.
|
|
146
|
|
147 '''
|
|
148 def check_overlapping(gff_reader,chrm,start,stop,strand):
|
|
149
|
|
150 #### probleme avec les genes completement inclu...
|
|
151
|
|
152 iv2 = HTSeq.GenomicInterval(chrm,start,stop,strand)
|
|
153 for feature in gff_reader:
|
|
154 if feature.type == "gene" :
|
|
155 if feature.iv.overlaps(iv2) :
|
|
156 ## if its a reverse gene, we replace start of extension by start of previous gene
|
|
157 if strand == '-' :
|
|
158 return (feature.iv.end+3,stop)
|
|
159 ## else we replace stop of extension by start of following gene
|
|
160 else :
|
|
161 return (start,feature.iv.start-3)
|
|
162 ## if no overlap are find, we return -1
|
|
163 return (start,stop)
|
|
164
|
|
165
|
|
166 def pass_length(start,stop) :
|
|
167
|
|
168 if (stop-start) > 25 :
|
|
169 return True
|
|
170 else :
|
|
171 return False
|
|
172
|
|
173
|
|
174 def check_homo_coverage(gene,GFF,start,stop, aln) :
|
|
175
|
|
176 chrom = GFF[gene]['chrom']
|
|
177 ## compute nunber of nucleotides in CDS with a coverage equal to zero
|
|
178 nt_cds_cov = 0
|
|
179 nt_cds_num = 0
|
|
180 for i in range(1,GFF[gene]['exon_number']+1) :
|
|
181 for z in range(GFF[gene]['exon'][i]['start'],GFF[gene]['exon'][i]['stop']):
|
|
182 nt_cds_num += 1
|
|
183 if aln.count(chrom,z,z+1) == 0 :
|
|
184 nt_cds_cov += 1
|
|
185
|
|
186 ## compute percent of CDS no covering
|
|
187 percent = nt_cds_cov*100/nt_cds_num
|
|
188
|
|
189 ## compute number of nucleotides with no coverage in extension
|
|
190 nt_no_cover = 0
|
|
191 for pos in range(start,stop,1) :
|
|
192 if aln.count(chrom,pos,pos+1) == 0 :
|
|
193 nt_no_cover += 1
|
|
194 #print gene, nt_cds_cov, percent, nt_no_cover
|
|
195 #percent10 = (stop-start)*50/100
|
|
196 if (nt_no_cover*100)/(stop-start) > percent :
|
|
197 return False
|
|
198 else :
|
|
199 return True
|
|
200
|
|
201 def plot_gene ( aln, gene, start_extension, stop_extension, dirout ) :
|
|
202
|
|
203 try:
|
|
204 strand = gene['strand']
|
|
205 len_gene = gene['stop']-gene['start']
|
|
206 if strand is "-" :
|
|
207 len_ext = gene['stop']-start_extension
|
|
208 ##Â coverage in all gene + extension
|
|
209 start = start_extension-100
|
|
210 stop = gene['stop']+100
|
|
211 vector1 = [0]*(stop-start)
|
|
212 #for pileupcolumn in aln.pileup( gene['chrom'], start, stop):
|
|
213 # vector.append(pileupcolumn.n)
|
|
214 for read in aln.fetch(gene['chrom'], start, stop):
|
|
215 if read.is_reverse :
|
|
216 ## for get footprint in P-site (estimate) we take 3 nt in middle of read
|
|
217 #pos = (read.pos+13)-start
|
|
218 pos = (read.pos-start) + (read.rlen/2)-1
|
|
219 for z in range(pos,(pos+3)) :
|
|
220 ## le fetch contient des reads qui chevauchent les 30 nt de la fin du gene mais dont le site P
|
|
221 ## se trouve avant notre vector, le z devient negatif et la couverture augmente en fin de vecteur (ce qui est faux)
|
|
222 if z > 0 :
|
|
223 try :
|
|
224 vector1[z] += 1
|
|
225 except IndexError :
|
|
226 pass
|
|
227 vector1.reverse()
|
|
228 mean_read = float(sum(vector1))/float(len(vector1))
|
|
229 cov = [(x/mean_read) for x in vector1]
|
|
230 idx_tot = arange(len(cov))
|
|
231 ## coverage in extension
|
|
232 start_ext = start_extension-40
|
|
233 stop_ext = stop_extension+30
|
|
234 vector2 = [0]*(stop_ext-start_ext)
|
|
235 #for pileupcolumn in aln.pileup( gene['chrom'], start, stop):
|
|
236 # vector.append(pileupcolumn.n)
|
|
237 for read in aln.fetch(gene['chrom'], start_extension, stop_ext):
|
|
238 if read.is_reverse :
|
|
239 ## get footprint in P-site
|
|
240 #pos = (read.pos+13)-start_ext
|
|
241 pos = (read.pos-start_ext) + (read.rlen/2)-1
|
|
242 for z in range(pos,(pos+3)) :
|
|
243 if z > 0 :
|
|
244 try :
|
|
245 vector2[z] += 1
|
|
246 except IndexError :
|
|
247 pass
|
|
248 vector2.reverse()
|
|
249 #mean_read = float(sum(vector))/float(len(vector))
|
|
250 cov_ext = [(x/mean_read) for x in vector2]
|
|
251 _max = max(cov_ext[30::])
|
|
252 idx_ext = arange(len(cov_ext))
|
|
253
|
|
254 else :
|
|
255 len_ext = stop_extension-gene['start']
|
|
256 start = gene['start']-100
|
|
257 stop = stop_extension+100
|
|
258 vector = [0]*(stop-start)
|
|
259 #for pileupcolumn in aln.pileup( gene['chrom'], start, stop):
|
|
260 #vector.append(pileupcolumn.n)
|
|
261 for read in aln.fetch(gene['chrom'], start, stop):
|
|
262 if not read.is_reverse :
|
|
263 ## get footprint in P-site
|
|
264 #pos = (read.pos+12)-start
|
|
265 pos = (read.pos-start) + (read.rlen/2)-1
|
|
266 for z in range(pos,(pos+3)) :
|
|
267 if z > 0 :
|
|
268 try :
|
|
269 vector[z] += 1
|
|
270 except IndexError :
|
|
271 pass
|
|
272 mean_read = float(sum(vector))/float(len(vector))
|
|
273 cov = [(x/mean_read) for x in vector]
|
|
274 idx_tot = arange(len(cov))
|
|
275
|
|
276 ## coverage in extension
|
|
277 start_ext = gene['stop']-30
|
|
278 stop_ext = stop_extension+40
|
|
279 vector = [0]*(stop-start_ext)
|
|
280 for read in aln.fetch(gene['chrom'], start_ext, stop_extension):
|
|
281 if not read.is_reverse :
|
|
282 ## get footprint in P-site
|
|
283 pos = (read.pos-start_ext) + (read.rlen/2)-1
|
|
284 for z in range(pos,(pos+3)) :
|
|
285 if z > 0 :
|
|
286 try :
|
|
287 vector[z] += 1
|
|
288 except IndexError :
|
|
289 pass
|
|
290
|
|
291 cov_ext = [(x/mean_read) for x in vector]
|
|
292 idx_ext = arange(len(cov_ext))
|
|
293 _max = max(cov_ext[30::])
|
|
294
|
|
295 #### PLOT FIGURE ####
|
|
296
|
|
297 font = {'family' : 'serif','color': 'grey','weight' : 'normal','size' : 16 }
|
|
298
|
|
299
|
|
300 fig = pl.figure(num=1)
|
|
301 ## create a big subplot for common y axis on two last subplot
|
|
302 ax = fig.add_subplot(2,1,2)
|
|
303 ## hide all spines
|
|
304 ax.spines['top'].set_visible(False)
|
|
305 ax.spines['bottom'].set_visible(False)
|
|
306 ax.spines['left'].set_visible(False)
|
|
307 ax.spines['right'].set_visible(False)
|
|
308 ax.tick_params(labelcolor='w', top='off', bottom='off', left='off', right='off')
|
|
309
|
|
310 ## plot gene structure
|
|
311 ax1 = fig.add_subplot(3,1,1)
|
|
312 ax1.set_title(gene['name'])
|
|
313 ## hide all spines
|
|
314 ax1.spines['right'].set_color('none')
|
|
315 ax1.spines['top'].set_color('none')
|
|
316 ax1.spines['left'].set_color('none')
|
|
317 ax1.spines['bottom'].set_color('none')
|
|
318 ax1.set_ylim(0,5)
|
|
319 #set xlim as second plot with 100nt switch
|
|
320 ax1.set_xlim(-100,len(idx_tot)-100)
|
|
321 ## hide ticks
|
|
322 pl.yticks([])
|
|
323 pl.xticks([])
|
|
324 ## if it's a "simple" gene
|
|
325 if gene['exon_number'] == 1 :
|
|
326 exon = arange(len_gene)
|
|
327 x = [3]*len_gene
|
|
328 ax1.plot(exon,x,color='#9B9E9C')
|
|
329 ax1.fill_between(exon,2,x,color='#9B9E9C')
|
|
330 ## if it's a gene with introns
|
|
331 else :
|
|
332 ## plot a line representing intron
|
|
333 intron = arange(len_gene)
|
|
334 y = [2.5]*len_gene
|
|
335 ax1.plot(intron,y,color='#9B9E9C')
|
|
336
|
|
337 ## plotting each intron
|
|
338 start = gene['start']
|
|
339 if strand == '+' :
|
|
340 for exon in range(1,gene['exon_number']+1,1) :
|
|
341 len_exon = gene['exon'][exon]['stop']-gene['exon'][exon]['start']
|
|
342 idx = gene['exon'][exon]['start'] - start
|
|
343 exo = arange(idx,idx+len_exon)
|
|
344 x = [3]*len_exon
|
|
345 ax1.plot(exo,x,color='#9B9E9C')
|
|
346 ax1.fill_between(exo,2,x,color='#9B9E9C')
|
|
347 else :
|
|
348 ## if it's a reverse gene we must reverse exon's position
|
|
349 start = gene['start']
|
|
350 tabF = [2.5]*len_gene
|
|
351 tabR = [2.5]*len_gene
|
|
352 for exon in range(1,gene['exon_number']+1,1) :
|
|
353 len_exon = gene['exon'][exon]['stop']-gene['exon'][exon]['start']
|
|
354 idx = gene['exon'][exon]['start'] - start
|
|
355 exo = arange(idx,idx+len_exon)
|
|
356 for z in exo :
|
|
357 tabF[z] = 3
|
|
358 tabR[z] = 2
|
|
359 tabF.reverse()
|
|
360 tabR.reverse()
|
|
361 #pl.ylim(0,5)
|
|
362 ax1.plot(tabF,color='#9B9E9C')
|
|
363 ax1.plot(tabR,color='#9B9E9C')
|
|
364 x = arange(len(tabR))
|
|
365 ax1.fill_between(x,tabF,tabR,color='#9B9E9C')
|
|
366
|
|
367 ## insert arrows (as genome browser representation)
|
|
368 yloc = 2.5
|
|
369 narrows = 20
|
|
370 exonwidth = .8
|
|
371 spread = .4 * len_gene / narrows
|
|
372 for i in range(narrows):
|
|
373 loc = (float(i) * len_gene / narrows)+ (len(idx_tot)/100)*2
|
|
374 if strand == '+' :
|
|
375 x = [loc - spread, loc, loc - spread]
|
|
376 else:
|
|
377 x = [loc - spread, loc, loc - spread]
|
|
378 y = [yloc - exonwidth / 5, yloc, yloc + exonwidth / 5]
|
|
379 ax1.plot(x, y, lw=1.4, color='#676B69')
|
|
380
|
|
381
|
|
382 # plot coverage in all gene + extension
|
|
383 ax2 = fig.add_subplot(3,1,2)
|
|
384 ## fixe limit to length of coverage for x axis
|
|
385 ax2.set_xlim(0,len(idx_tot))
|
|
386 ## fixe 4 ticks and associated labels for y axis
|
|
387 ax2.set_yticklabels(arange(0,int(max(cov)+20),int((max(cov)+20)/4)))
|
|
388 ax2.set_yticks(arange(0,int(max(cov)+20),int((max(cov)+20)/4)))
|
|
389 ### add start and stop of gene in axe in place of number
|
|
390 ax2.set_xticks([100,(100+len_gene),(100+len_ext)])
|
|
391 ax2.set_xticklabels(["{:,}".format(gene['start']),"{:,}".format(gene['stop']),""])
|
|
392 ## hide spines and any axis
|
|
393 ax2.spines['right'].set_visible(False)
|
|
394 ax2.spines['top'].set_visible(False)
|
|
395 ax2.yaxis.set_ticks_position('left')
|
|
396 ax2.xaxis.set_ticks_position('bottom')
|
|
397 ## plot and fill
|
|
398 ax2.plot(idx_tot, cov, color="#CC0011")
|
|
399 ax2.fill_between(idx_tot, 0,cov,color="#CC0011")
|
|
400 ax2.set_title("Genomic coordinates (%s,%s)"% (gene['strand'],gene['chrom']) ,fontdict={'fontsize':'small'})
|
|
401
|
|
402 # plot zoom coverage in extension
|
|
403 ax3 = fig.add_subplot(3,1,3)
|
|
404 ## fixe limit to length of coverage for x axis
|
|
405 ax3.set_ylim(0,int(_max))
|
|
406 # we hide spines
|
|
407 ax3.spines['right'].set_visible(False)
|
|
408 ax3.spines['top'].set_visible(False)
|
|
409 ax3.yaxis.set_ticks_position('left')
|
|
410 ax3.xaxis.set_ticks_position('bottom')
|
|
411 ##Â add stop and in-frame stop in x axis
|
|
412 pl.xticks([30,( stop_extension-start_extension)+30],["stop codon","next in-frame stop"])
|
|
413 ## fixe good position for labels
|
|
414 (ax3.get_xticklabels()[0]).set_horizontalalignment('center')
|
|
415 (ax3.get_xticklabels()[1]).set_horizontalalignment('left')
|
|
416 #if max coverage is lower than 2, we have a illegal division by zero
|
|
417 if _max > 2 :
|
|
418 ax3.set_yticklabels(arange(0,int(_max+1),int((_max+1)/3)))
|
|
419 ax3.set_yticks(arange(0,int(_max+1),int((_max+1)/3)))
|
|
420 else :
|
|
421 ##
|
|
422 ax3.set_ylim(0,_max)
|
|
423 ax3.ticklabel_format(style='sci', scilimits=(-2,2), axis='y',useOffset=False)
|
|
424
|
|
425 ax3.plot(idx_ext, cov_ext, color="#CC0011")
|
|
426 ax3.fill_between(idx_ext, 0,cov_ext,color="#CC0011")
|
|
427
|
|
428
|
|
429 ## get scale of subplot 3
|
|
430 #ax3.text(ax3.get_xlim()[1]-50,ax3.get_ylim()[1]-1, r'50 nt', fontdict=font)
|
|
431 #pl.arrow( ax3.get_xlim()[1]-50, ax3.get_ylim()[1]-2, ax3.get_xlim()[1]-50, 0, fc="grey", ec="grey",lw=2)
|
|
432
|
|
433 ## set common y label
|
|
434 ax.set_ylabel('Normalized footprint counts',labelpad=20)
|
|
435
|
|
436 ## draw and save plot
|
|
437 pl.draw()
|
|
438 #pl.show()
|
|
439 pl.savefig(dirout+".png",format='png', dpi=300)
|
|
440 pl.clf()
|
|
441
|
|
442
|
|
443 ## Make thumbnail for html page
|
|
444 infile = dirout+'.png'
|
|
445 size = 128, 128
|
|
446 im = Image.open(infile)
|
|
447 im.thumbnail(size, Image.ANTIALIAS)
|
|
448 im.save(dirout+"_thumbnail.png", "PNG")
|
|
449
|
|
450
|
|
451 except Exception, e:
|
|
452 stop_err( 'Error during gene plotting : ' + str( e ) )
|
|
453
|
|
454
|
|
455 def compute_analysis(bam, GFF, fasta, gff, dirout) :
|
|
456
|
|
457 try:
|
|
458 background_file = dirout+"/background_sequence.fasta"
|
|
459 file_back = open(background_file, 'w')
|
|
460 file_context = open(dirout+"/stop_context.fasta", 'w')
|
|
461 file_extension = open(dirout+"/extensions.fasta", 'w')
|
|
462 ## Opening Bam file with pysam librarie
|
|
463 pysam.index(bam)
|
|
464 aln = pysam.Samfile(bam, "rb",header=True, check_header = True)
|
|
465 ## Opening fasta file in a dict with BioPython
|
|
466 SeqDict = SeqIO.to_dict(SeqIO.parse(open(fasta,"r"),"fasta"))
|
|
467
|
|
468 ## count total read in bam file
|
|
469 cmd = "samtools view -c %s " % (bam)
|
|
470 proc = subprocess.Popen( args=cmd, shell=True, stdout = subprocess.PIPE)
|
|
471 count_tot = int(proc.stdout.read().rstrip())
|
|
472 returncode = proc.wait()
|
|
473
|
|
474 ## opening a GFF reader for check overlapping
|
|
475 gff_reader = HTSeq.GFF_Reader(gff)
|
|
476
|
|
477 with open(dirout+"/readthrough_result.csv","w") as out :
|
|
478 myheader = ['Gene','Name', 'FAIL', 'Stop context','chrom','start extension','stop extension','length extension','RPKM CDS', 'RPKM extension','ratio','Annotation','sequence']
|
|
479 writer = csv.writer(out,delimiter='\t')
|
|
480 writer.writerow(myheader)
|
|
481 for gene in GFF['order'] :
|
|
482 indic = ""
|
|
483 # compute rpkm of CDS :
|
|
484 len_cds = GFF[gene]['stop']-GFF[gene]['start']
|
|
485 count_cds = 0
|
|
486 ### count method of pysam doesn't strand information
|
|
487 if GFF[gene]['strand'] == '+' :
|
|
488 for track in aln.fetch(GFF[gene]['chrom'],GFF[gene]['start']+12,GFF[gene]['stop']-15) :
|
|
489 if not track.is_reverse :
|
|
490 count_cds += 1
|
|
491 else :
|
|
492 for track in aln.fetch(GFF[gene]['chrom'],GFF[gene]['start']+15,GFF[gene]['stop']-12) :
|
|
493 if track.is_reverse :
|
|
494 count_cds += 1
|
|
495 rpkm_cds = compute_rpkm(len_cds,count_cds,count_tot)
|
|
496 ## Only if gene is translate :
|
|
497 if rpkm_cds > 0 and count_cds > 128:
|
|
498 ## search footprint in UTR3
|
|
499 count = 0
|
|
500 try :
|
|
501 if GFF[gene]['strand'] == '+' :
|
|
502 contexte_stop = str(SeqDict[GFF[gene]['chrom']].seq[GFF[gene]['stop']-6:GFF[gene]['stop']+6])
|
|
503 #print gene, contexte_stop
|
|
504 start_extension = GFF[gene]['stop']
|
|
505 stop_extension = GFF[gene]['stop']+90
|
|
506 for track in aln.fetch(GFF[gene]['chrom'],start_extension+15,stop_extension) :
|
|
507 if not track.is_reverse :
|
|
508 count += 1
|
|
509
|
|
510 ## get sequence of extension
|
|
511 seq = str(SeqDict[GFF[gene]['chrom']].seq[start_extension:stop_extension])
|
|
512
|
|
513 else :
|
|
514 contexte_stop = str(SeqDict[GFF[gene]['chrom']].seq[GFF[gene]['start']-7:GFF[gene]['start']+5].reverse_complement())
|
|
515 #print gene, contexte_stop
|
|
516 start_extension = GFF[gene]['start']-90
|
|
517 stop_extension = GFF[gene]['start']
|
|
518 for track in aln.fetch(GFF[gene]['chrom'],start_extension,stop_extension-15) :
|
|
519 if track.is_reverse :
|
|
520 count += 1
|
|
521 ## get sequence of extension
|
|
522 seq = str(SeqDict[GFF[gene]['chrom']].seq[start_extension:stop_extension-1].reverse_complement())
|
|
523
|
|
524 ## if we have coverage after cds stop codon
|
|
525 if count > 10 :
|
|
526 res = find_stop(seq)
|
|
527 if res == -1 :
|
|
528 '''
|
|
529 Write results with no stop but RPF in extension
|
|
530 '''
|
|
531 ## check if next in-frame codon is far than 90 nt extension :
|
|
532 if GFF[gene]['strand'] == '+' :
|
|
533 pos = check_overlapping(gff_reader,GFF[gene]['chrom'],GFF[gene]['stop']+1,GFF[gene]['stop']+300,'+')
|
|
534 start_extension = pos[0]-1
|
|
535 stop_extension = pos[1]
|
|
536 seq = str(SeqDict[GFF[gene]['chrom']].seq[start_extension:stop_extension])
|
|
537
|
|
538 #print gene,count,pos,'\n',seq
|
|
539
|
|
540 if (seq):
|
|
541 res = find_stop(seq)
|
|
542 if res == -1 :
|
|
543 mylist = [gene,GFF[gene]['name'],'no stop',contexte_stop, GFF[gene]['chrom'], start_extension, stop_extension,stop_extension-start_extension,rpkm_cds,'-','-',GFF[gene]['note'],seq]
|
|
544 writer.writerow(mylist)
|
|
545 else :
|
|
546 indic = 'ok'
|
|
547 #print res
|
|
548 #stop_extension = start_extension + res +3
|
|
549 else :
|
|
550 pos = check_overlapping(gff_reader,GFF[gene]['chrom'],GFF[gene]['start']-300,GFF[gene]['start']-1,'-')
|
|
551 start_extension = pos[0]
|
|
552 stop_extension = pos[1]+1
|
|
553 seq = str(SeqDict[GFF[gene]['chrom']].seq[start_extension:stop_extension-1].reverse_complement())
|
|
554
|
|
555 #print gene,count,pos,'\n',seq
|
|
556
|
|
557 if (seq):
|
|
558 res = find_stop(seq)
|
|
559 if res == -1 :
|
|
560 mylist = [gene,GFF[gene]['name'],'no stop',contexte_stop, GFF[gene]['chrom'], start_extension, stop_extension,stop_extension-start_extension,rpkm_cds,'-','-',GFF[gene]['note'],seq]
|
|
561 writer.writerow(mylist)
|
|
562 else :
|
|
563 indic = 'ok'
|
|
564 #print res
|
|
565 #start_extension = stop_extension - res -3
|
|
566 else :
|
|
567 indic = 'ok'
|
|
568
|
|
569
|
|
570 if indic == 'ok' :
|
|
571 ## We save new coordinates
|
|
572 if GFF[gene]['strand'] == '+' :
|
|
573 stop_extension = start_extension + res +3
|
|
574 #print gene, count
|
|
575 #print gene,start_extension,stop_extension
|
|
576 seq = str(SeqDict[GFF[gene]['chrom']].seq[start_extension:stop_extension])
|
|
577 #print seq
|
|
578 count_stop = aln.count(GFF[gene]['chrom'],stop_extension-2,stop_extension+2)
|
|
579 if pass_length(start_extension,stop_extension) :
|
|
580 count_ext = aln.count(GFF[gene]['chrom'],start_extension+9,stop_extension-15)
|
|
581 if stop_extension > GFF[gene]['stop']+9 :
|
|
582 stop_ok = 1
|
|
583 else :
|
|
584 stop_ok = 0
|
|
585 else :
|
|
586 start_extension = stop_extension - res - 3
|
|
587 #print gene, count
|
|
588 #print gene,start_extension,stop_extension
|
|
589 seq = str(SeqDict[GFF[gene]['chrom']].seq[start_extension-1:stop_extension-1].reverse_complement())
|
|
590 #print seq
|
|
591 count_stop = aln.count(GFF[gene]['chrom'],start_extension-2,start_extension+2)
|
|
592 if pass_length(start_extension,stop_extension) :
|
|
593 count_ext = aln.count(GFF[gene]['chrom'],start_extension+15,stop_extension-9)
|
|
594 if start_extension < GFF[gene]['start']-9 :
|
|
595 stop_ok = 1
|
|
596 else :
|
|
597 stop_ok = 0
|
|
598 ## if we are no methionine in 5 codons following stop of CDS
|
|
599 if (not check_met(seq) ):
|
|
600 ## if we have footprint in stop codon extension and stop extension is further than cds_stop+9
|
|
601 if count_stop > 2 and stop_ok == 1 :
|
|
602 homo_cov = check_homo_coverage(gene,GFF,start_extension,stop_extension,aln)
|
|
603 if (homo_cov) :
|
|
604 '''
|
|
605 write result witch corresponding to readthrough
|
|
606 '''
|
|
607 ##if length of extension upper than 25 we can compute rpkm
|
|
608 if (not pass_length(start_extension,stop_extension)) :
|
|
609 len_ext = stop_extension-start_extension
|
|
610 rpkm_ext = 'nan'
|
|
611 ratio = 'nan'
|
|
612 else :
|
|
613 len_ext = stop_extension-start_extension
|
|
614 rpkm_ext = compute_rpkm(len_ext,count_ext,count_tot)
|
|
615 ## compute ratio between extension coverage and cds coverage (rpkm)
|
|
616 ratio = rpkm_ext/rpkm_cds
|
|
617 #print gene,ratio
|
|
618 #print start_extension,stop_extension
|
|
619 mylist = [gene,GFF[gene]['name'],'-',contexte_stop,GFF[gene]['chrom'], start_extension, stop_extension,stop_extension-start_extension,rpkm_cds,rpkm_ext,ratio,GFF[gene]['note'],seq]
|
|
620 writer.writerow(mylist)
|
|
621 file_context.write('>'+gene+'\n'+contexte_stop+'\n')
|
|
622 file_extension.write('>'+gene+'\n'+seq+'\n')
|
|
623 else :
|
|
624 '''
|
|
625 write result witch corresponding to readthrough but with no homogeneous coverage
|
|
626 '''
|
|
627 if (not pass_length(start_extension,stop_extension)) :
|
|
628 len_ext = stop_extension-start_extension
|
|
629 rpkm_ext = 'nan'
|
|
630 ratio = 'nan'
|
|
631 else :
|
|
632 len_ext = stop_extension-start_extension
|
|
633 rpkm_ext = compute_rpkm(len_ext,count_ext,count_tot)
|
|
634 ## compute ratio between extension coverage and cds coverage (rpkm)
|
|
635 ratio = rpkm_ext/rpkm_cds
|
|
636 mylist = [gene,GFF[gene]['name'],'hetero cov',contexte_stop, GFF[gene]['chrom'], start_extension, stop_extension,stop_extension-start_extension,rpkm_cds,rpkm_ext,ratio,GFF[gene]['note'],seq]
|
|
637 writer.writerow(mylist)
|
|
638 file_context.write('>'+gene+'\n'+contexte_stop+'\n')
|
|
639 file_extension.write('>'+gene+'\n'+seq+'\n')
|
|
640 #print ">"+gene+"\n"+contexte_stop
|
|
641
|
|
642 ## plot gene :
|
|
643 plot_gene(aln, GFF[gene], start_extension, stop_extension, dirout+"/"+gene)
|
|
644
|
|
645
|
|
646
|
|
647 else :
|
|
648 '''
|
|
649 write result with no footprint in stop codon of extension
|
|
650 '''
|
|
651 mylist = [gene,GFF[gene]['name'],'no RPF in stop',contexte_stop, GFF[gene]['chrom'], start_extension, stop_extension,stop_extension-start_extension,rpkm_cds,'-','-',GFF[gene]['note'],seq]
|
|
652 writer.writerow(mylist)
|
|
653 file_context.write('>'+gene+'\n'+contexte_stop+'\n')
|
|
654 file_extension.write('>'+gene+'\n'+seq+'\n')
|
|
655 #print ">"+gene+"\n"+contexte_stop
|
|
656 else :
|
|
657 '''
|
|
658 write result with RPF maybe result of reinitiation on a start codon
|
|
659 '''
|
|
660 if pass_length(start_extension,stop_extension) :
|
|
661 mylist = [gene,GFF[gene]['name'],'Met after stop', contexte_stop, GFF[gene]['chrom'], start_extension, stop_extension,stop_extension-start_extension,rpkm_cds,'-','-',GFF[gene]['note'],seq]
|
|
662 writer.writerow(mylist)
|
|
663 file_context.write('>'+gene+'\n'+contexte_stop+'\n')
|
|
664 file_extension.write('>'+gene+'\n'+seq+'\n')
|
|
665 #print ">"+gene+"\n"+contexte_stop
|
|
666 else :
|
|
667 ## if its not a interesting case, we get stop context of genes without readthrough
|
|
668 if GFF[gene]['strand'] == '+' :
|
|
669 contexte_stop = str(SeqDict[GFF[gene]['chrom']].seq[GFF[gene]['stop']-6:GFF[gene]['stop']+6])
|
|
670 file_back.write(contexte_stop+'\n')
|
|
671 else :
|
|
672 contexte_stop = str(SeqDict[GFF[gene]['chrom']].seq[GFF[gene]['start']-7:GFF[gene]['start']+5].reverse_complement())
|
|
673 file_back.write(contexte_stop+'\n')
|
|
674
|
|
675 ## excluded UT with incorrect positions
|
|
676 except ValueError:
|
|
677 pass
|
|
678
|
|
679
|
|
680 file_context.close()
|
|
681 file_back.close()
|
|
682 file_extension.close()
|
|
683 except Exception,e:
|
|
684 stop_err( 'Error during computing analysis : ' + str( e ) )
|
|
685
|
|
686
|
|
687 def cleaning_bam(bam):
|
|
688 '''
|
|
689 Remove reads unmapped, non uniquely mapped and reads with length lower than 25 and upper than 32, and mapping quality upper than 12
|
|
690 '''
|
|
691 try :
|
|
692 header = subprocess.check_output(['samtools', 'view', '-H', bam], stderr= subprocess.PIPE)
|
|
693 #header = results.split('\n')
|
|
694
|
|
695 # check mapper for define multiple tag
|
|
696 if re.search('bowtie', header):
|
|
697 multi_tag = "XS:i:"
|
|
698 elif re.search('bwa', header):
|
|
699 multi_tag = "XT:A:M"
|
|
700 else :
|
|
701 stop_err("No PG tag find in"+bam+". Please use bowtie or bwa for mapping")
|
|
702
|
|
703 tmp_sam = tempfile.mktemp()
|
|
704 cmd = "samtools view %s > %s" % (bam, tmp_sam)
|
|
705 proc = subprocess.Popen( args=cmd, shell=True, stderr = subprocess.PIPE)
|
|
706 returncode = proc.wait()
|
|
707
|
|
708
|
|
709 with open(tempfile.mktemp(),'w') as out :
|
|
710 out.write(header)
|
|
711 with open(tmp_sam,'r') as sam :
|
|
712 for line in sam :
|
|
713 if multi_tag not in line and line.split('\t')[1] != '4' and int(line.split('\t')[4]) > 12 :
|
|
714 if len(line.split('\t')[9]) < 32 and len(line.split('\t')[9]) > 25 :
|
|
715 out.write(line)
|
|
716 bamfile = tempfile.mktemp()+'.bam'
|
|
717 cmd = "samtools view -hSb %s > %s" % (out.name,bamfile)
|
|
718 proc = subprocess.Popen( args=cmd, shell=True, stderr = subprocess.PIPE)
|
|
719 returncode = proc.wait()
|
|
720
|
|
721 return bamfile
|
|
722
|
|
723 except Exception,e:
|
|
724 stop_err( 'Error during cleaning bam : ' + str( e ) )
|
|
725
|
|
726 def write_html_page(html,subfolder) :
|
|
727
|
|
728
|
|
729 try :
|
|
730
|
|
731 gene_table = ''
|
|
732 gene_table += '<table>'
|
|
733 gene_table += '<thead><tr><th data-sort="string">Gene</th><th>Plot</th><th data-sort="string">Name</th><th>Stop context</th><th>Coordinates</th><th>RPKM CDS</th><th>RPKM extension</th><th data-sort="float">ratio</th><th>Extension</th></tr></thead><tbody>'
|
|
734
|
|
735 with open(os.path.join(subfolder,'readthrough_result.csv'), 'rb') as csvfile:
|
|
736 spamreader = csv.reader(csvfile, delimiter='\t')
|
|
737 ## skip the header
|
|
738 next(spamreader, None)
|
|
739 for row in spamreader:
|
|
740 if row[2] == '-' :
|
|
741 gene_table += '<tr><td>%s</td><td><a href="%s.png" data-lightbox="%s"><img src="%s_thumbnail.png" /></a></td><td><a title="%s">%s</a></td><td>%s</td><td>%s:%s-%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>' %(row[0], row[0], row[0], row[0], row[11], row[1], row[3], row[4], row[5], row[6], row[8], row[9], row[10], row[12])
|
|
742
|
|
743 gene_table += '</tbody></table>'
|
|
744
|
|
745
|
|
746
|
|
747 html_str = """
|
|
748 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
|
749 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
|
750
|
|
751 <html xmlns="http://www.w3.org/1999/xhtml">
|
|
752 <head>
|
|
753 <link rel="stylesheet" href="http://code.jquery.com/ui/1.10.3/themes/smoothness/jquery-ui.css" />
|
|
754 <script src="http://code.jquery.com/jquery-1.10.2.min.js"></script>
|
|
755 <script src="http://ajax.googleapis.com/ajax/libs/jquery/1.9.1/jquery.min.js"></script>
|
|
756 <script src="http://code.jquery.com/ui/1.10.3/jquery-ui.js"></script>
|
|
757 <script src="lightbox/js/jquery-1.10.2.min.js"></script>
|
|
758 <script src="lightbox/js/lightbox-2.6.min.js"></script>
|
|
759 <link href="lightbox/css/lightbox.css" rel="stylesheet" />
|
|
760 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
|
761 <title>Dual coding result file</title>
|
|
762 <link rel="stylesheet" href="http://code.jquery.com/ui/1.10.3/themes/smoothness/jquery-ui.css" />
|
|
763 <script>
|
|
764 (function(d){d.fn.stupidtable=function(b){return this.each(function(){var a=d(this);b=b||{};b=d.extend({},d.fn.stupidtable.default_sort_fns,b);var n=function(a,b){for(var f=[],c=0,e=a.slice(0).sort(b),h=0;h<a.length;h++){for(c=d.inArray(a[h],e);-1!=d.inArray(c,f);)c++;f.push(c)}return f},q=function(a,b){for(var d=a.slice(0),c=0,e=0;e<b.length;e++)c=b[e],d[c]=a[e];return d};a.on("click","th",function(){var m=a.children("tbody").children("tr"),g=d(this),f=0,c=d.fn.stupidtable.dir;a.find("th").slice(0, g.index()).each(function(){var a=d(this).attr("colspan")||1;f+=parseInt(a,10)});var e=g.data("sort-default")||c.ASC;g.data("sort-dir")&&(e=g.data("sort-dir")===c.ASC?c.DESC:c.ASC);var h=g.data("sort")||null;null!==h&&(a.trigger("beforetablesort",{column:f,direction:e}),a.css("display"),setTimeout(function(){var l=[],p=b[h];m.each(function(a,b){var c=d(b).children().eq(f),e=c.data("sort-value"),c="undefined"!==typeof e?e:c.text();l.push(c)});var k;k=e==c.ASC?n(l,p):n(l,function(a,b){return-p(a,b)}); a.find("th").data("sort-dir",null).removeClass("sorting-desc sorting-asc");g.data("sort-dir",e).addClass("sorting-"+e);k=d(q(m,k));a.children("tbody").remove();a.append("<tbody />").append(k);a.trigger("aftertablesort",{column:f,direction:e});a.css("display")},10))})})};d.fn.stupidtable.dir={ASC:"asc",DESC:"desc"};d.fn.stupidtable.default_sort_fns={"int":function(b,a){return parseInt(b,10)-parseInt(a,10)},"float":function(b,a){return parseFloat(b)-parseFloat(a)},string:function(b,a){return b<a?-1: b>a?1:0},"string-ins":function(b,a){b=b.toLowerCase();a=a.toLowerCase();return b<a?-1:b>a?1:0}}})(jQuery);
|
|
765 (function($) {
|
|
766
|
|
767 $.fn.stupidtable = function(sortFns) {
|
|
768 return this.each(function() {
|
|
769 var $table = $(this);
|
|
770 sortFns = sortFns || {};
|
|
771
|
|
772 // ==================================================== //
|
|
773 // Utility functions //
|
|
774 // ==================================================== //
|
|
775
|
|
776 // Merge sort functions with some default sort functions.
|
|
777 sortFns = $.extend({}, $.fn.stupidtable.default_sort_fns, sortFns);
|
|
778
|
|
779 // Return the resulting indexes of a sort so we can apply
|
|
780 // this result elsewhere. This returns an array of index numbers.
|
|
781 // return[0] = x means "arr's 0th element is now at x"
|
|
782 var sort_map = function(arr, sort_function) {
|
|
783 var map = [];
|
|
784 var index = 0;
|
|
785 var sorted = arr.slice(0).sort(sort_function);
|
|
786 for (var i=0; i<arr.length; i++) {
|
|
787 index = $.inArray(arr[i], sorted);
|
|
788
|
|
789 // If this index is already in the map, look for the next index.
|
|
790 // This handles the case of duplicate entries.
|
|
791 while ($.inArray(index, map) != -1) {
|
|
792 index++;
|
|
793 }
|
|
794 map.push(index);
|
|
795 }
|
|
796
|
|
797 return map;
|
|
798 };
|
|
799
|
|
800 // Apply a sort map to the array.
|
|
801 var apply_sort_map = function(arr, map) {
|
|
802 var clone = arr.slice(0),
|
|
803 newIndex = 0;
|
|
804 for (var i=0; i<map.length; i++) {
|
|
805 newIndex = map[i];
|
|
806 clone[newIndex] = arr[i];
|
|
807 }
|
|
808 return clone;
|
|
809 };
|
|
810
|
|
811 // ==================================================== //
|
|
812 // Begin execution! //
|
|
813 // ==================================================== //
|
|
814
|
|
815 // Do sorting when THs are clicked
|
|
816 $table.on("click", "th", function() {
|
|
817 var trs = $table.children("tbody").children("tr");
|
|
818 var $this = $(this);
|
|
819 var th_index = 0;
|
|
820 var dir = $.fn.stupidtable.dir;
|
|
821
|
|
822 $table.find("th").slice(0, $this.index()).each(function() {
|
|
823 var cols = $(this).attr("colspan") || 1;
|
|
824 th_index += parseInt(cols,10);
|
|
825 });
|
|
826
|
|
827 // Determine (and/or reverse) sorting direction, default `asc`
|
|
828 var sort_dir = $this.data("sort-default") || dir.ASC;
|
|
829 if ($this.data("sort-dir"))
|
|
830 sort_dir = $this.data("sort-dir") === dir.ASC ? dir.DESC : dir.ASC;
|
|
831
|
|
832 // Choose appropriate sorting function.
|
|
833 var type = $this.data("sort") || null;
|
|
834
|
|
835 // Prevent sorting if no type defined
|
|
836 if (type === null) {
|
|
837 return;
|
|
838 }
|
|
839
|
|
840 // Trigger `beforetablesort` event that calling scripts can hook into;
|
|
841 // pass parameters for sorted column index and sorting direction
|
|
842 $table.trigger("beforetablesort", {column: th_index, direction: sort_dir});
|
|
843 // More reliable method of forcing a redraw
|
|
844 $table.css("display");
|
|
845
|
|
846 // Run sorting asynchronously on a timout to force browser redraw after
|
|
847 // `beforetablesort` callback. Also avoids locking up the browser too much.
|
|
848 setTimeout(function() {
|
|
849 // Gather the elements for this column
|
|
850 var column = [];
|
|
851 var sortMethod = sortFns[type];
|
|
852
|
|
853 // Push either the value of the `data-order-by` attribute if specified
|
|
854 // or just the text() value in this column to column[] for comparison.
|
|
855 trs.each(function(index,tr) {
|
|
856 var $e = $(tr).children().eq(th_index);
|
|
857 var sort_val = $e.data("sort-value");
|
|
858 var order_by = typeof(sort_val) !== "undefined" ? sort_val : $e.text();
|
|
859 column.push(order_by);
|
|
860 });
|
|
861
|
|
862 // Create the sort map. This column having a sort-dir implies it was
|
|
863 // the last column sorted. As long as no data-sort-desc is specified,
|
|
864 // we're free to just reverse the column.
|
|
865 var theMap;
|
|
866 if (sort_dir == dir.ASC)
|
|
867 theMap = sort_map(column, sortMethod);
|
|
868 else
|
|
869 theMap = sort_map(column, function(a, b) { return -sortMethod(a, b); });
|
|
870
|
|
871 // Reset siblings
|
|
872 $table.find("th").data("sort-dir", null).removeClass("sorting-desc sorting-asc");
|
|
873 $this.data("sort-dir", sort_dir).addClass("sorting-"+sort_dir);
|
|
874
|
|
875 var sortedTRs = $(apply_sort_map(trs, theMap));
|
|
876 $table.children("tbody").remove();
|
|
877 $table.append("<tbody />").append(sortedTRs);
|
|
878
|
|
879 // Trigger `aftertablesort` event. Similar to `beforetablesort`
|
|
880 $table.trigger("aftertablesort", {column: th_index, direction: sort_dir});
|
|
881 // More reliable method of forcing a redraw
|
|
882 $table.css("display");
|
|
883 }, 10);
|
|
884 });
|
|
885 });
|
|
886 };
|
|
887
|
|
888 // Enum containing sorting directions
|
|
889 $.fn.stupidtable.dir = {ASC: "asc", DESC: "desc"};
|
|
890
|
|
891 $.fn.stupidtable.default_sort_fns = {
|
|
892 "int": function(a, b) {
|
|
893 return parseInt(a, 10) - parseInt(b, 10);
|
|
894 },
|
|
895 "float": function(a, b) {
|
|
896 return parseFloat(a) - parseFloat(b);
|
|
897 },
|
|
898 "string": function(a, b) {
|
|
899 if (a < b) return -1;
|
|
900 if (a > b) return +1;
|
|
901 return 0;
|
|
902 },
|
|
903 "string-ins": function(a, b) {
|
|
904 a = a.toLowerCase();
|
|
905 b = b.toLowerCase();
|
|
906 if (a < b) return -1;
|
|
907 if (a > b) return +1;
|
|
908 return 0;
|
|
909 }
|
|
910 };
|
|
911
|
|
912 })(jQuery);
|
|
913 $(function(){
|
|
914 var table = $("table").stupidtable();
|
|
915
|
|
916 table.on("beforetablesort", function (event, data) {
|
|
917 // data.column - the index of the column sorted after a click
|
|
918 // data.direction - the sorting direction (either asc or desc)
|
|
919 $("#msg").text("Sorting index " + data.column)
|
|
920 });
|
|
921 table.on("aftertablesort", function (event, data) {
|
|
922 var th = $(this).find("th");
|
|
923 th.find(".arrow").remove();
|
|
924 var dir = $.fn.stupidtable.dir;
|
|
925 var arrow = data.direction === dir.ASC ? "↑" : "↓";
|
|
926 th.eq(data.column).append('<span class="arrow">' + arrow +'</span>');
|
|
927 });
|
|
928 });
|
|
929 </script>
|
|
930 <style type="text/css">
|
|
931 label {
|
|
932 display: inline-block;
|
|
933 width: 5em;
|
|
934 }
|
|
935 table {
|
|
936 border-collapse: collapse;
|
|
937 }
|
|
938 th, td {
|
|
939 padding: 5px 10px;
|
|
940 border: 1px solid #999;
|
|
941 }
|
|
942 th {
|
|
943 background-color: #a7d3ff;
|
|
944 }
|
|
945 th[data-sort]{
|
|
946 cursor:pointer;
|
|
947 }
|
|
948 a[data-lightbox]{
|
|
949 cursor:zoom-in;
|
|
950 }
|
|
951 #msg {
|
|
952 color: green;
|
|
953 }
|
|
954 </style>
|
|
955 </head>
|
|
956 <body>
|
|
957 <h1>Readthrough analyse results</h1>
|
|
958 %s
|
|
959 </body>
|
|
960 </html> """ % (gene_table)
|
|
961
|
|
962 html_file = open(html,"w")
|
|
963 html_file.write(html_str)
|
|
964 html_file.close()
|
|
965
|
|
966
|
|
967 except Exception, e :
|
|
968 stop_err('Error during html page creation : ' + str( e ) )
|
|
969
|
|
970
|
|
971 def __main__():
|
|
972
|
|
973 ## python metagene_readthrough.py -g Saccer3.gff -f Saccer3.fa -b B1_sorted.bam -o Bstrain_readthrough.csv
|
|
974 #python /home/rlegendre/galaxy/galaxy-dist/tools/rib_profiling/metagene_readthrough.py -g /home/rlegendre/galaxy/galaxy-dist/SharedData/Ribo/Saccer3.gff -f /home/rlegendre/galaxy/galaxy-dist/SharedData/Ribo/Saccer3.fa -b /data/Mapping/read_mapped_unique_psiP_sorted.bam -o /home/rlegendre/Documents/Translecture/plop_py.csv -d /home/rlegendre/Documents/Translecture/out_trans/
|
|
975 #python /home/rlegendre/galaxy/galaxy-dist/tools/rib_profiling/metagene_readthrough.py -g /home/rlegendre/galaxy/galaxy-dist/SharedData/Ribo/Saccer3.gff -f /home/rlegendre/galaxy/galaxy-dist/SharedData/Ribo/Saccer3.fa -b /home/rlegendre/Documents/PDE2S/PDE2S_trim_no_rRNA_sorted.bam -o /home/rlegendre/Documents/PDE2S/Readthrough_pde2s.csv -d /home/rlegendre/Documents/PDE2S/out_trans
|
|
976
|
|
977
|
|
978 #Parse command line options
|
|
979 parser = optparse.OptionParser()
|
|
980 parser.add_option("-g", "--gff", dest="gff", type= "string",
|
|
981 help="GFF annotation file", metavar="FILE")
|
|
982
|
|
983 parser.add_option("-f", "--fasta", dest="fasta", type= "string",
|
|
984 help="Fasta file ", metavar="FILE")
|
|
985
|
|
986 parser.add_option("-b", "--bam", dest="bamfile", type= "string",
|
|
987 help="Bam Ribo-Seq alignments ", metavar="FILE")
|
|
988
|
|
989 parser.add_option("-d", "--dirout", dest="dirout", type= "string",
|
|
990 help="write report in this html file and in associated directory", metavar="STR,STR")
|
|
991
|
|
992 parser.add_option("-q", "--quiet",
|
|
993 action="store_false", dest="verbose", default=True,
|
|
994 help="don't print status messages to stdout")
|
|
995
|
|
996 (options, args) = parser.parse_args()
|
|
997 sys.stdout.write("Begin readthrough analysis at %s\n" % time.asctime( time.localtime(time.time())))
|
|
998
|
|
999 try:
|
|
1000 (html_file, subfolder ) = options.dirout.split(",")
|
|
1001 if os.path.exists(subfolder):
|
|
1002 raise
|
|
1003 try:
|
|
1004 os.mkdir(subfolder)
|
|
1005 except:
|
|
1006 raise
|
|
1007
|
|
1008 GFF = store_gff(options.gff)
|
|
1009 clean_file = cleaning_bam(options.bamfile)
|
|
1010 compute_analysis(clean_file, GFF, options.fasta, options.gff, subfolder)
|
|
1011 if os.path.exists( clean_file ):
|
|
1012 os.remove( clean_file )
|
|
1013
|
|
1014 write_html_page(html_file,subfolder)
|
|
1015 ##paste jquery script in result directory :
|
|
1016 jq_src = os.path.join(os.path.dirname(__file__),'lightbox')
|
|
1017 shutil.copytree(jq_src,os.path.join(subfolder,'lightbox'))
|
|
1018
|
|
1019
|
|
1020 sys.stdout.write("Finish readthrough analysis at %s\n" % time.asctime( time.localtime(time.time())))
|
|
1021 except Exception, e:
|
|
1022 stop_err( 'Error running metagene readthrough analysis : ' + str( e ) )
|
|
1023
|
|
1024
|
|
1025 if __name__=="__main__":
|
|
1026 __main__()
|