Mercurial > repos > rlegendre > ribo_tools
comparison metagene_readthrough.py @ 6:29c9c86e17e1
Uploaded
author | rlegendre |
---|---|
date | Mon, 20 Oct 2014 11:07:40 -0400 |
parents | |
children | 707807fee542 |
comparison
equal
deleted
inserted
replaced
5:0b8fa8b7f356 | 6:29c9c86e17e1 |
---|---|
1 #!/usr/bin/env python2.7.3 | |
2 #-*- coding: utf-8 -*- | |
3 | |
4 ''' | |
5 Created on Dec. 2013 | |
6 @author: rachel legendre | |
7 @copyright: rachel.legendre@igmors.u-psud.fr | |
8 @license: GPL v3 | |
9 ''' | |
10 | |
11 import os, sys, time, optparse, shutil, re, urllib, subprocess, tempfile | |
12 from urllib import unquote | |
13 from Bio import SeqIO | |
14 import csv | |
15 import pysam | |
16 import HTSeq | |
17 #from matplotlib import pyplot as pl | |
18 import matplotlib | |
19 matplotlib.use('Agg') | |
20 import matplotlib.pyplot as pl | |
21 from numpy import arange | |
22 from matplotlib import ticker as t | |
23 from PIL import Image | |
24 | |
25 def stop_err( msg ): | |
26 sys.stderr.write( "%s\n" % msg ) | |
27 sys.stderr.write( "Programme aborted at %s\n" % time.asctime(time.localtime(time.time()))) | |
28 sys.exit() | |
29 | |
30 | |
31 def store_gff(gff): | |
32 ''' | |
33 parse and store gff file in a dictionnary | |
34 ''' | |
35 try: | |
36 GFF = {} | |
37 with open(gff, 'r') as f_gff : | |
38 GFF['order'] = [] | |
39 for line in f_gff: | |
40 ## switch commented lines | |
41 line = line.split("#")[0] | |
42 if line != "" : | |
43 feature = (line.split('\t')[8]).split(';') | |
44 # first line is already gene line : | |
45 if line.split('\t')[2] == 'gene' : | |
46 gene = feature[0].replace("ID=","") | |
47 if re.search('gene',feature[2]) : | |
48 Name = feature[2].replace("gene=","") | |
49 else : | |
50 Name = "Unknown" | |
51 ##get annotation | |
52 note = re.sub(r".+\;Note\=(.+)\;display\=.+", r"\1", line) | |
53 note = unquote(str(note)).replace("\n","") | |
54 ## store gene information | |
55 GFF['order'].append(gene) | |
56 GFF[gene] = {} | |
57 GFF[gene]['chrom'] = line.split('\t')[0] | |
58 GFF[gene]['start'] = int(line.split('\t')[3]) | |
59 GFF[gene]['stop'] = int(line.split('\t')[4]) | |
60 GFF[gene]['strand'] = line.split('\t')[6] | |
61 GFF[gene]['name'] = Name | |
62 GFF[gene]['note'] = note | |
63 GFF[gene]['exon'] = {} | |
64 GFF[gene]['exon_number'] = 0 | |
65 #print Name | |
66 elif line.split('\t')[2] == 'CDS' : | |
67 gene = re.sub(r".?Parent\=(.+)(_mRNA)+", r"\1", feature[0]) | |
68 if GFF.has_key(gene) : | |
69 GFF[gene]['exon_number'] += 1 | |
70 exon_number = GFF[gene]['exon_number'] | |
71 GFF[gene]['exon'][exon_number] = {} | |
72 GFF[gene]['exon'][exon_number]['frame'] = line.split('\t')[7] | |
73 GFF[gene]['exon'][exon_number]['start'] = int(line.split('\t')[3]) | |
74 GFF[gene]['exon'][exon_number]['stop'] = int(line.split('\t')[4]) | |
75 | |
76 ## if there is a five prim UTR intron, we change start of gene | |
77 elif line.split('\t')[2] == 'five_prime_UTR_intron' : | |
78 if GFF[gene]['strand'] == "+" : | |
79 GFF[gene]['start'] = GFF[gene]['exon'][1]['start'] | |
80 else : | |
81 GFF[gene]['stop'] = GFF[gene]['exon'][exon_number]['stop'] | |
82 return GFF | |
83 except Exception,e: | |
84 stop_err( 'Error during gff storage : ' + str( e ) ) | |
85 | |
86 | |
87 | |
88 | |
89 def compute_rpkm(length,count_gene,count_tot) : | |
90 | |
91 try : | |
92 rpkm = "{0:.4f}".format(count_gene*1000000.0/(count_tot*length)) | |
93 except ArithmeticError : | |
94 stop_err( 'Illegal division by zero') | |
95 return float(rpkm) | |
96 | |
97 | |
98 def find_stop(seq) : | |
99 ''' | |
100 Find stop codon in a sequence and return position of first nucleotide in stop | |
101 ''' | |
102 stop_codon = ['TAA','TAG','TGA'] | |
103 for nt in range(0,len(seq)-3,3) : | |
104 codon = seq[nt:nt+3] | |
105 if codon in stop_codon : | |
106 return nt | |
107 | |
108 return -1 | |
109 | |
110 | |
111 def check_met(seq): | |
112 ''' | |
113 Boolean function for testing presence or absence of methionine in 5 codons following stop codon | |
114 ''' | |
115 met = 'ATG' | |
116 for pos in range(0,15,3) : | |
117 codon = seq[pos:pos+3] | |
118 if codon in met : | |
119 return True | |
120 | |
121 return False | |
122 | |
123 | |
124 ''' | |
125 feature.iv is a GenomicInterval object : | |
126 A GenomicInterval object has the following slots, some of which | |
127 are calculated from the other: | |
128 | |
129 chrom: The name of a sequence (i.e., chromosome, contig, or | |
130 the like). | |
131 start: The start of the interval. Even on the reverse strand, | |
132 this is always the smaller of the two values 'start' and 'end'. | |
133 Note that all positions should be given as 0-based value! | |
134 end: The end of the interval. Following Python convention for | |
135 ranges, this in one more than the coordinate of the last base | |
136 that is considered part of the sequence. | |
137 strand: The strand, as a single character, '+' or '-'. '.' indicates | |
138 that the strand is irrelavant. (Alternatively, pass a Strand object.) | |
139 length: The length of the interval, i.e., end - start | |
140 start_d: The "directional start" position. This is the position of the | |
141 first base of the interval, taking the strand into account. Hence, | |
142 this is the same as 'start' except when strand == '-', in which | |
143 case it is end-1. | |
144 end_d: The "directional end": Usually, the same as 'end', but for | |
145 strand=='-1', it is start-1. | |
146 | |
147 ''' | |
148 def check_overlapping(gff_reader,chrm,start,stop,strand): | |
149 | |
150 #### probleme avec les genes completement inclu... | |
151 | |
152 iv2 = HTSeq.GenomicInterval(chrm,start,stop,strand) | |
153 for feature in gff_reader: | |
154 if feature.type == "gene" : | |
155 if feature.iv.overlaps(iv2) : | |
156 ## if its a reverse gene, we replace start of extension by start of previous gene | |
157 if strand == '-' : | |
158 return (feature.iv.end+3,stop) | |
159 ## else we replace stop of extension by start of following gene | |
160 else : | |
161 return (start,feature.iv.start-3) | |
162 ## if no overlap are find, we return -1 | |
163 return (start,stop) | |
164 | |
165 | |
166 def pass_length(start,stop) : | |
167 | |
168 if (stop-start) > 25 : | |
169 return True | |
170 else : | |
171 return False | |
172 | |
173 | |
174 def check_homo_coverage(gene,GFF,start,stop, aln) : | |
175 | |
176 chrom = GFF[gene]['chrom'] | |
177 ## compute nunber of nucleotides in CDS with a coverage equal to zero | |
178 nt_cds_cov = 0 | |
179 nt_cds_num = 0 | |
180 for i in range(1,GFF[gene]['exon_number']+1) : | |
181 for z in range(GFF[gene]['exon'][i]['start'],GFF[gene]['exon'][i]['stop']): | |
182 nt_cds_num += 1 | |
183 if aln.count(chrom,z,z+1) == 0 : | |
184 nt_cds_cov += 1 | |
185 | |
186 ## compute percent of CDS no covering | |
187 percent = nt_cds_cov*100/nt_cds_num | |
188 | |
189 ## compute number of nucleotides with no coverage in extension | |
190 nt_no_cover = 0 | |
191 for pos in range(start,stop,1) : | |
192 if aln.count(chrom,pos,pos+1) == 0 : | |
193 nt_no_cover += 1 | |
194 #print gene, nt_cds_cov, percent, nt_no_cover | |
195 #percent10 = (stop-start)*50/100 | |
196 if (nt_no_cover*100)/(stop-start) > percent : | |
197 return False | |
198 else : | |
199 return True | |
200 | |
201 def plot_gene ( aln, gene, start_extension, stop_extension, dirout ) : | |
202 | |
203 try: | |
204 strand = gene['strand'] | |
205 len_gene = gene['stop']-gene['start'] | |
206 if strand is "-" : | |
207 len_ext = gene['stop']-start_extension | |
208 ## coverage in all gene + extension | |
209 start = start_extension-100 | |
210 stop = gene['stop']+100 | |
211 vector1 = [0]*(stop-start) | |
212 #for pileupcolumn in aln.pileup( gene['chrom'], start, stop): | |
213 # vector.append(pileupcolumn.n) | |
214 for read in aln.fetch(gene['chrom'], start, stop): | |
215 if read.is_reverse : | |
216 ## for get footprint in P-site (estimate) we take 3 nt in middle of read | |
217 #pos = (read.pos+13)-start | |
218 pos = (read.pos-start) + (read.rlen/2)-1 | |
219 for z in range(pos,(pos+3)) : | |
220 ## le fetch contient des reads qui chevauchent les 30 nt de la fin du gene mais dont le site P | |
221 ## se trouve avant notre vector, le z devient negatif et la couverture augmente en fin de vecteur (ce qui est faux) | |
222 if z > 0 : | |
223 try : | |
224 vector1[z] += 1 | |
225 except IndexError : | |
226 pass | |
227 vector1.reverse() | |
228 mean_read = float(sum(vector1))/float(len(vector1)) | |
229 cov = [(x/mean_read) for x in vector1] | |
230 idx_tot = arange(len(cov)) | |
231 ## coverage in extension | |
232 start_ext = start_extension-40 | |
233 stop_ext = stop_extension+30 | |
234 vector2 = [0]*(stop_ext-start_ext) | |
235 #for pileupcolumn in aln.pileup( gene['chrom'], start, stop): | |
236 # vector.append(pileupcolumn.n) | |
237 for read in aln.fetch(gene['chrom'], start_extension, stop_ext): | |
238 if read.is_reverse : | |
239 ## get footprint in P-site | |
240 #pos = (read.pos+13)-start_ext | |
241 pos = (read.pos-start_ext) + (read.rlen/2)-1 | |
242 for z in range(pos,(pos+3)) : | |
243 if z > 0 : | |
244 try : | |
245 vector2[z] += 1 | |
246 except IndexError : | |
247 pass | |
248 vector2.reverse() | |
249 #mean_read = float(sum(vector))/float(len(vector)) | |
250 cov_ext = [(x/mean_read) for x in vector2] | |
251 _max = max(cov_ext[30::]) | |
252 idx_ext = arange(len(cov_ext)) | |
253 | |
254 else : | |
255 len_ext = stop_extension-gene['start'] | |
256 start = gene['start']-100 | |
257 stop = stop_extension+100 | |
258 vector = [0]*(stop-start) | |
259 #for pileupcolumn in aln.pileup( gene['chrom'], start, stop): | |
260 #vector.append(pileupcolumn.n) | |
261 for read in aln.fetch(gene['chrom'], start, stop): | |
262 if not read.is_reverse : | |
263 ## get footprint in P-site | |
264 #pos = (read.pos+12)-start | |
265 pos = (read.pos-start) + (read.rlen/2)-1 | |
266 for z in range(pos,(pos+3)) : | |
267 if z > 0 : | |
268 try : | |
269 vector[z] += 1 | |
270 except IndexError : | |
271 pass | |
272 mean_read = float(sum(vector))/float(len(vector)) | |
273 cov = [(x/mean_read) for x in vector] | |
274 idx_tot = arange(len(cov)) | |
275 | |
276 ## coverage in extension | |
277 start_ext = gene['stop']-30 | |
278 stop_ext = stop_extension+40 | |
279 vector = [0]*(stop-start_ext) | |
280 for read in aln.fetch(gene['chrom'], start_ext, stop_extension): | |
281 if not read.is_reverse : | |
282 ## get footprint in P-site | |
283 pos = (read.pos-start_ext) + (read.rlen/2)-1 | |
284 for z in range(pos,(pos+3)) : | |
285 if z > 0 : | |
286 try : | |
287 vector[z] += 1 | |
288 except IndexError : | |
289 pass | |
290 | |
291 cov_ext = [(x/mean_read) for x in vector] | |
292 idx_ext = arange(len(cov_ext)) | |
293 _max = max(cov_ext[30::]) | |
294 | |
295 #### PLOT FIGURE #### | |
296 | |
297 font = {'family' : 'serif','color': 'grey','weight' : 'normal','size' : 16 } | |
298 | |
299 | |
300 fig = pl.figure(num=1) | |
301 ## create a big subplot for common y axis on two last subplot | |
302 ax = fig.add_subplot(2,1,2) | |
303 ## hide all spines | |
304 ax.spines['top'].set_visible(False) | |
305 ax.spines['bottom'].set_visible(False) | |
306 ax.spines['left'].set_visible(False) | |
307 ax.spines['right'].set_visible(False) | |
308 ax.tick_params(labelcolor='w', top='off', bottom='off', left='off', right='off') | |
309 | |
310 ## plot gene structure | |
311 ax1 = fig.add_subplot(3,1,1) | |
312 ax1.set_title(gene['name']) | |
313 ## hide all spines | |
314 ax1.spines['right'].set_color('none') | |
315 ax1.spines['top'].set_color('none') | |
316 ax1.spines['left'].set_color('none') | |
317 ax1.spines['bottom'].set_color('none') | |
318 ax1.set_ylim(0,5) | |
319 #set xlim as second plot with 100nt switch | |
320 ax1.set_xlim(-100,len(idx_tot)-100) | |
321 ## hide ticks | |
322 pl.yticks([]) | |
323 pl.xticks([]) | |
324 ## if it's a "simple" gene | |
325 if gene['exon_number'] == 1 : | |
326 exon = arange(len_gene) | |
327 x = [3]*len_gene | |
328 ax1.plot(exon,x,color='#9B9E9C') | |
329 ax1.fill_between(exon,2,x,color='#9B9E9C') | |
330 ## if it's a gene with introns | |
331 else : | |
332 ## plot a line representing intron | |
333 intron = arange(len_gene) | |
334 y = [2.5]*len_gene | |
335 ax1.plot(intron,y,color='#9B9E9C') | |
336 | |
337 ## plotting each intron | |
338 start = gene['start'] | |
339 if strand == '+' : | |
340 for exon in range(1,gene['exon_number']+1,1) : | |
341 len_exon = gene['exon'][exon]['stop']-gene['exon'][exon]['start'] | |
342 idx = gene['exon'][exon]['start'] - start | |
343 exo = arange(idx,idx+len_exon) | |
344 x = [3]*len_exon | |
345 ax1.plot(exo,x,color='#9B9E9C') | |
346 ax1.fill_between(exo,2,x,color='#9B9E9C') | |
347 else : | |
348 ## if it's a reverse gene we must reverse exon's position | |
349 start = gene['start'] | |
350 tabF = [2.5]*len_gene | |
351 tabR = [2.5]*len_gene | |
352 for exon in range(1,gene['exon_number']+1,1) : | |
353 len_exon = gene['exon'][exon]['stop']-gene['exon'][exon]['start'] | |
354 idx = gene['exon'][exon]['start'] - start | |
355 exo = arange(idx,idx+len_exon) | |
356 for z in exo : | |
357 tabF[z] = 3 | |
358 tabR[z] = 2 | |
359 tabF.reverse() | |
360 tabR.reverse() | |
361 #pl.ylim(0,5) | |
362 ax1.plot(tabF,color='#9B9E9C') | |
363 ax1.plot(tabR,color='#9B9E9C') | |
364 x = arange(len(tabR)) | |
365 ax1.fill_between(x,tabF,tabR,color='#9B9E9C') | |
366 | |
367 ## insert arrows (as genome browser representation) | |
368 yloc = 2.5 | |
369 narrows = 20 | |
370 exonwidth = .8 | |
371 spread = .4 * len_gene / narrows | |
372 for i in range(narrows): | |
373 loc = (float(i) * len_gene / narrows)+ (len(idx_tot)/100)*2 | |
374 if strand == '+' : | |
375 x = [loc - spread, loc, loc - spread] | |
376 else: | |
377 x = [loc - spread, loc, loc - spread] | |
378 y = [yloc - exonwidth / 5, yloc, yloc + exonwidth / 5] | |
379 ax1.plot(x, y, lw=1.4, color='#676B69') | |
380 | |
381 | |
382 # plot coverage in all gene + extension | |
383 ax2 = fig.add_subplot(3,1,2) | |
384 ## fixe limit to length of coverage for x axis | |
385 ax2.set_xlim(0,len(idx_tot)) | |
386 ## fixe 4 ticks and associated labels for y axis | |
387 ax2.set_yticklabels(arange(0,int(max(cov)+20),int((max(cov)+20)/4))) | |
388 ax2.set_yticks(arange(0,int(max(cov)+20),int((max(cov)+20)/4))) | |
389 ### add start and stop of gene in axe in place of number | |
390 ax2.set_xticks([100,(100+len_gene),(100+len_ext)]) | |
391 ax2.set_xticklabels(["{:,}".format(gene['start']),"{:,}".format(gene['stop']),""]) | |
392 ## hide spines and any axis | |
393 ax2.spines['right'].set_visible(False) | |
394 ax2.spines['top'].set_visible(False) | |
395 ax2.yaxis.set_ticks_position('left') | |
396 ax2.xaxis.set_ticks_position('bottom') | |
397 ## plot and fill | |
398 ax2.plot(idx_tot, cov, color="#CC0011") | |
399 ax2.fill_between(idx_tot, 0,cov,color="#CC0011") | |
400 ax2.set_title("Genomic coordinates (%s,%s)"% (gene['strand'],gene['chrom']) ,fontdict={'fontsize':'small'}) | |
401 | |
402 # plot zoom coverage in extension | |
403 ax3 = fig.add_subplot(3,1,3) | |
404 ## fixe limit to length of coverage for x axis | |
405 ax3.set_ylim(0,int(_max)) | |
406 # we hide spines | |
407 ax3.spines['right'].set_visible(False) | |
408 ax3.spines['top'].set_visible(False) | |
409 ax3.yaxis.set_ticks_position('left') | |
410 ax3.xaxis.set_ticks_position('bottom') | |
411 ## add stop and in-frame stop in x axis | |
412 pl.xticks([30,( stop_extension-start_extension)+30],["stop codon","next in-frame stop"]) | |
413 ## fixe good position for labels | |
414 (ax3.get_xticklabels()[0]).set_horizontalalignment('center') | |
415 (ax3.get_xticklabels()[1]).set_horizontalalignment('left') | |
416 #if max coverage is lower than 2, we have a illegal division by zero | |
417 if _max > 2 : | |
418 ax3.set_yticklabels(arange(0,int(_max+1),int((_max+1)/3))) | |
419 ax3.set_yticks(arange(0,int(_max+1),int((_max+1)/3))) | |
420 else : | |
421 ## | |
422 ax3.set_ylim(0,_max) | |
423 ax3.ticklabel_format(style='sci', scilimits=(-2,2), axis='y',useOffset=False) | |
424 | |
425 ax3.plot(idx_ext, cov_ext, color="#CC0011") | |
426 ax3.fill_between(idx_ext, 0,cov_ext,color="#CC0011") | |
427 | |
428 | |
429 ## get scale of subplot 3 | |
430 #ax3.text(ax3.get_xlim()[1]-50,ax3.get_ylim()[1]-1, r'50 nt', fontdict=font) | |
431 #pl.arrow( ax3.get_xlim()[1]-50, ax3.get_ylim()[1]-2, ax3.get_xlim()[1]-50, 0, fc="grey", ec="grey",lw=2) | |
432 | |
433 ## set common y label | |
434 ax.set_ylabel('Normalized footprint counts',labelpad=20) | |
435 | |
436 ## draw and save plot | |
437 pl.draw() | |
438 #pl.show() | |
439 pl.savefig(dirout+".png",format='png', dpi=300) | |
440 pl.clf() | |
441 | |
442 | |
443 ## Make thumbnail for html page | |
444 infile = dirout+'.png' | |
445 size = 128, 128 | |
446 im = Image.open(infile) | |
447 im.thumbnail(size, Image.ANTIALIAS) | |
448 im.save(dirout+"_thumbnail.png", "PNG") | |
449 | |
450 | |
451 except Exception, e: | |
452 stop_err( 'Error during gene plotting : ' + str( e ) ) | |
453 | |
454 | |
455 def compute_analysis(bam, GFF, fasta, gff, dirout) : | |
456 | |
457 try: | |
458 background_file = dirout+"/background_sequence.fasta" | |
459 file_back = open(background_file, 'w') | |
460 file_context = open(dirout+"/stop_context.fasta", 'w') | |
461 file_extension = open(dirout+"/extensions.fasta", 'w') | |
462 ## Opening Bam file with pysam librarie | |
463 pysam.index(bam) | |
464 aln = pysam.Samfile(bam, "rb",header=True, check_header = True) | |
465 ## Opening fasta file in a dict with BioPython | |
466 SeqDict = SeqIO.to_dict(SeqIO.parse(open(fasta,"r"),"fasta")) | |
467 | |
468 ## count total read in bam file | |
469 cmd = "samtools view -c %s " % (bam) | |
470 proc = subprocess.Popen( args=cmd, shell=True, stdout = subprocess.PIPE) | |
471 count_tot = int(proc.stdout.read().rstrip()) | |
472 returncode = proc.wait() | |
473 | |
474 ## opening a GFF reader for check overlapping | |
475 gff_reader = HTSeq.GFF_Reader(gff) | |
476 | |
477 with open(dirout+"/readthrough_result.csv","w") as out : | |
478 myheader = ['Gene','Name', 'FAIL', 'Stop context','chrom','start extension','stop extension','length extension','RPKM CDS', 'RPKM extension','ratio','Annotation','sequence'] | |
479 writer = csv.writer(out,delimiter='\t') | |
480 writer.writerow(myheader) | |
481 for gene in GFF['order'] : | |
482 indic = "" | |
483 # compute rpkm of CDS : | |
484 len_cds = GFF[gene]['stop']-GFF[gene]['start'] | |
485 count_cds = 0 | |
486 ### count method of pysam doesn't strand information | |
487 if GFF[gene]['strand'] == '+' : | |
488 for track in aln.fetch(GFF[gene]['chrom'],GFF[gene]['start']+12,GFF[gene]['stop']-15) : | |
489 if not track.is_reverse : | |
490 count_cds += 1 | |
491 else : | |
492 for track in aln.fetch(GFF[gene]['chrom'],GFF[gene]['start']+15,GFF[gene]['stop']-12) : | |
493 if track.is_reverse : | |
494 count_cds += 1 | |
495 rpkm_cds = compute_rpkm(len_cds,count_cds,count_tot) | |
496 ## Only if gene is translate : | |
497 if rpkm_cds > 0 and count_cds > 128: | |
498 ## search footprint in UTR3 | |
499 count = 0 | |
500 try : | |
501 if GFF[gene]['strand'] == '+' : | |
502 contexte_stop = str(SeqDict[GFF[gene]['chrom']].seq[GFF[gene]['stop']-6:GFF[gene]['stop']+6]) | |
503 #print gene, contexte_stop | |
504 start_extension = GFF[gene]['stop'] | |
505 stop_extension = GFF[gene]['stop']+90 | |
506 for track in aln.fetch(GFF[gene]['chrom'],start_extension+15,stop_extension) : | |
507 if not track.is_reverse : | |
508 count += 1 | |
509 | |
510 ## get sequence of extension | |
511 seq = str(SeqDict[GFF[gene]['chrom']].seq[start_extension:stop_extension]) | |
512 | |
513 else : | |
514 contexte_stop = str(SeqDict[GFF[gene]['chrom']].seq[GFF[gene]['start']-7:GFF[gene]['start']+5].reverse_complement()) | |
515 #print gene, contexte_stop | |
516 start_extension = GFF[gene]['start']-90 | |
517 stop_extension = GFF[gene]['start'] | |
518 for track in aln.fetch(GFF[gene]['chrom'],start_extension,stop_extension-15) : | |
519 if track.is_reverse : | |
520 count += 1 | |
521 ## get sequence of extension | |
522 seq = str(SeqDict[GFF[gene]['chrom']].seq[start_extension:stop_extension-1].reverse_complement()) | |
523 | |
524 ## if we have coverage after cds stop codon | |
525 if count > 10 : | |
526 res = find_stop(seq) | |
527 if res == -1 : | |
528 ''' | |
529 Write results with no stop but RPF in extension | |
530 ''' | |
531 ## check if next in-frame codon is far than 90 nt extension : | |
532 if GFF[gene]['strand'] == '+' : | |
533 pos = check_overlapping(gff_reader,GFF[gene]['chrom'],GFF[gene]['stop']+1,GFF[gene]['stop']+300,'+') | |
534 start_extension = pos[0]-1 | |
535 stop_extension = pos[1] | |
536 seq = str(SeqDict[GFF[gene]['chrom']].seq[start_extension:stop_extension]) | |
537 | |
538 #print gene,count,pos,'\n',seq | |
539 | |
540 if (seq): | |
541 res = find_stop(seq) | |
542 if res == -1 : | |
543 mylist = [gene,GFF[gene]['name'],'no stop',contexte_stop, GFF[gene]['chrom'], start_extension, stop_extension,stop_extension-start_extension,rpkm_cds,'-','-',GFF[gene]['note'],seq] | |
544 writer.writerow(mylist) | |
545 else : | |
546 indic = 'ok' | |
547 #print res | |
548 #stop_extension = start_extension + res +3 | |
549 else : | |
550 pos = check_overlapping(gff_reader,GFF[gene]['chrom'],GFF[gene]['start']-300,GFF[gene]['start']-1,'-') | |
551 start_extension = pos[0] | |
552 stop_extension = pos[1]+1 | |
553 seq = str(SeqDict[GFF[gene]['chrom']].seq[start_extension:stop_extension-1].reverse_complement()) | |
554 | |
555 #print gene,count,pos,'\n',seq | |
556 | |
557 if (seq): | |
558 res = find_stop(seq) | |
559 if res == -1 : | |
560 mylist = [gene,GFF[gene]['name'],'no stop',contexte_stop, GFF[gene]['chrom'], start_extension, stop_extension,stop_extension-start_extension,rpkm_cds,'-','-',GFF[gene]['note'],seq] | |
561 writer.writerow(mylist) | |
562 else : | |
563 indic = 'ok' | |
564 #print res | |
565 #start_extension = stop_extension - res -3 | |
566 else : | |
567 indic = 'ok' | |
568 | |
569 | |
570 if indic == 'ok' : | |
571 ## We save new coordinates | |
572 if GFF[gene]['strand'] == '+' : | |
573 stop_extension = start_extension + res +3 | |
574 #print gene, count | |
575 #print gene,start_extension,stop_extension | |
576 seq = str(SeqDict[GFF[gene]['chrom']].seq[start_extension:stop_extension]) | |
577 #print seq | |
578 count_stop = aln.count(GFF[gene]['chrom'],stop_extension-2,stop_extension+2) | |
579 if pass_length(start_extension,stop_extension) : | |
580 count_ext = aln.count(GFF[gene]['chrom'],start_extension+9,stop_extension-15) | |
581 if stop_extension > GFF[gene]['stop']+9 : | |
582 stop_ok = 1 | |
583 else : | |
584 stop_ok = 0 | |
585 else : | |
586 start_extension = stop_extension - res - 3 | |
587 #print gene, count | |
588 #print gene,start_extension,stop_extension | |
589 seq = str(SeqDict[GFF[gene]['chrom']].seq[start_extension-1:stop_extension-1].reverse_complement()) | |
590 #print seq | |
591 count_stop = aln.count(GFF[gene]['chrom'],start_extension-2,start_extension+2) | |
592 if pass_length(start_extension,stop_extension) : | |
593 count_ext = aln.count(GFF[gene]['chrom'],start_extension+15,stop_extension-9) | |
594 if start_extension < GFF[gene]['start']-9 : | |
595 stop_ok = 1 | |
596 else : | |
597 stop_ok = 0 | |
598 ## if we are no methionine in 5 codons following stop of CDS | |
599 if (not check_met(seq) ): | |
600 ## if we have footprint in stop codon extension and stop extension is further than cds_stop+9 | |
601 if count_stop > 2 and stop_ok == 1 : | |
602 homo_cov = check_homo_coverage(gene,GFF,start_extension,stop_extension,aln) | |
603 if (homo_cov) : | |
604 ''' | |
605 write result witch corresponding to readthrough | |
606 ''' | |
607 ##if length of extension upper than 25 we can compute rpkm | |
608 if (not pass_length(start_extension,stop_extension)) : | |
609 len_ext = stop_extension-start_extension | |
610 rpkm_ext = 'nan' | |
611 ratio = 'nan' | |
612 else : | |
613 len_ext = stop_extension-start_extension | |
614 rpkm_ext = compute_rpkm(len_ext,count_ext,count_tot) | |
615 ## compute ratio between extension coverage and cds coverage (rpkm) | |
616 ratio = rpkm_ext/rpkm_cds | |
617 #print gene,ratio | |
618 #print start_extension,stop_extension | |
619 mylist = [gene,GFF[gene]['name'],'-',contexte_stop,GFF[gene]['chrom'], start_extension, stop_extension,stop_extension-start_extension,rpkm_cds,rpkm_ext,ratio,GFF[gene]['note'],seq] | |
620 writer.writerow(mylist) | |
621 file_context.write('>'+gene+'\n'+contexte_stop+'\n') | |
622 file_extension.write('>'+gene+'\n'+seq+'\n') | |
623 else : | |
624 ''' | |
625 write result witch corresponding to readthrough but with no homogeneous coverage | |
626 ''' | |
627 if (not pass_length(start_extension,stop_extension)) : | |
628 len_ext = stop_extension-start_extension | |
629 rpkm_ext = 'nan' | |
630 ratio = 'nan' | |
631 else : | |
632 len_ext = stop_extension-start_extension | |
633 rpkm_ext = compute_rpkm(len_ext,count_ext,count_tot) | |
634 ## compute ratio between extension coverage and cds coverage (rpkm) | |
635 ratio = rpkm_ext/rpkm_cds | |
636 mylist = [gene,GFF[gene]['name'],'hetero cov',contexte_stop, GFF[gene]['chrom'], start_extension, stop_extension,stop_extension-start_extension,rpkm_cds,rpkm_ext,ratio,GFF[gene]['note'],seq] | |
637 writer.writerow(mylist) | |
638 file_context.write('>'+gene+'\n'+contexte_stop+'\n') | |
639 file_extension.write('>'+gene+'\n'+seq+'\n') | |
640 #print ">"+gene+"\n"+contexte_stop | |
641 | |
642 ## plot gene : | |
643 plot_gene(aln, GFF[gene], start_extension, stop_extension, dirout+"/"+gene) | |
644 | |
645 | |
646 | |
647 else : | |
648 ''' | |
649 write result with no footprint in stop codon of extension | |
650 ''' | |
651 mylist = [gene,GFF[gene]['name'],'no RPF in stop',contexte_stop, GFF[gene]['chrom'], start_extension, stop_extension,stop_extension-start_extension,rpkm_cds,'-','-',GFF[gene]['note'],seq] | |
652 writer.writerow(mylist) | |
653 file_context.write('>'+gene+'\n'+contexte_stop+'\n') | |
654 file_extension.write('>'+gene+'\n'+seq+'\n') | |
655 #print ">"+gene+"\n"+contexte_stop | |
656 else : | |
657 ''' | |
658 write result with RPF maybe result of reinitiation on a start codon | |
659 ''' | |
660 if pass_length(start_extension,stop_extension) : | |
661 mylist = [gene,GFF[gene]['name'],'Met after stop', contexte_stop, GFF[gene]['chrom'], start_extension, stop_extension,stop_extension-start_extension,rpkm_cds,'-','-',GFF[gene]['note'],seq] | |
662 writer.writerow(mylist) | |
663 file_context.write('>'+gene+'\n'+contexte_stop+'\n') | |
664 file_extension.write('>'+gene+'\n'+seq+'\n') | |
665 #print ">"+gene+"\n"+contexte_stop | |
666 else : | |
667 ## if its not a interesting case, we get stop context of genes without readthrough | |
668 if GFF[gene]['strand'] == '+' : | |
669 contexte_stop = str(SeqDict[GFF[gene]['chrom']].seq[GFF[gene]['stop']-6:GFF[gene]['stop']+6]) | |
670 file_back.write(contexte_stop+'\n') | |
671 else : | |
672 contexte_stop = str(SeqDict[GFF[gene]['chrom']].seq[GFF[gene]['start']-7:GFF[gene]['start']+5].reverse_complement()) | |
673 file_back.write(contexte_stop+'\n') | |
674 | |
675 ## excluded UT with incorrect positions | |
676 except ValueError: | |
677 pass | |
678 | |
679 | |
680 file_context.close() | |
681 file_back.close() | |
682 file_extension.close() | |
683 except Exception,e: | |
684 stop_err( 'Error during computing analysis : ' + str( e ) ) | |
685 | |
686 | |
687 def cleaning_bam(bam): | |
688 ''' | |
689 Remove reads unmapped, non uniquely mapped and reads with length lower than 25 and upper than 32, and mapping quality upper than 12 | |
690 ''' | |
691 try : | |
692 header = subprocess.check_output(['samtools', 'view', '-H', bam], stderr= subprocess.PIPE) | |
693 #header = results.split('\n') | |
694 | |
695 # check mapper for define multiple tag | |
696 if re.search('bowtie', header): | |
697 multi_tag = "XS:i:" | |
698 elif re.search('bwa', header): | |
699 multi_tag = "XT:A:M" | |
700 else : | |
701 stop_err("No PG tag find in"+bam+". Please use bowtie or bwa for mapping") | |
702 | |
703 tmp_sam = tempfile.mktemp() | |
704 cmd = "samtools view %s > %s" % (bam, tmp_sam) | |
705 proc = subprocess.Popen( args=cmd, shell=True, stderr = subprocess.PIPE) | |
706 returncode = proc.wait() | |
707 | |
708 | |
709 with open(tempfile.mktemp(),'w') as out : | |
710 out.write(header) | |
711 with open(tmp_sam,'r') as sam : | |
712 for line in sam : | |
713 if multi_tag not in line and line.split('\t')[1] != '4' and int(line.split('\t')[4]) > 12 : | |
714 if len(line.split('\t')[9]) < 32 and len(line.split('\t')[9]) > 25 : | |
715 out.write(line) | |
716 bamfile = tempfile.mktemp()+'.bam' | |
717 cmd = "samtools view -hSb %s > %s" % (out.name,bamfile) | |
718 proc = subprocess.Popen( args=cmd, shell=True, stderr = subprocess.PIPE) | |
719 returncode = proc.wait() | |
720 | |
721 return bamfile | |
722 | |
723 except Exception,e: | |
724 stop_err( 'Error during cleaning bam : ' + str( e ) ) | |
725 | |
726 def write_html_page(html,subfolder) : | |
727 | |
728 | |
729 try : | |
730 | |
731 gene_table = '' | |
732 gene_table += '<table>' | |
733 gene_table += '<thead><tr><th data-sort="string">Gene</th><th>Plot</th><th data-sort="string">Name</th><th>Stop context</th><th>Coordinates</th><th>RPKM CDS</th><th>RPKM extension</th><th data-sort="float">ratio</th><th>Extension</th></tr></thead><tbody>' | |
734 | |
735 with open(os.path.join(subfolder,'readthrough_result.csv'), 'rb') as csvfile: | |
736 spamreader = csv.reader(csvfile, delimiter='\t') | |
737 ## skip the header | |
738 next(spamreader, None) | |
739 for row in spamreader: | |
740 if row[2] == '-' : | |
741 gene_table += '<tr><td>%s</td><td><a href="%s.png" data-lightbox="%s"><img src="%s_thumbnail.png" /></a></td><td><a title="%s">%s</a></td><td>%s</td><td>%s:%s-%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>' %(row[0], row[0], row[0], row[0], row[11], row[1], row[3], row[4], row[5], row[6], row[8], row[9], row[10], row[12]) | |
742 | |
743 gene_table += '</tbody></table>' | |
744 | |
745 | |
746 | |
747 html_str = """ | |
748 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" | |
749 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> | |
750 | |
751 <html xmlns="http://www.w3.org/1999/xhtml"> | |
752 <head> | |
753 <link rel="stylesheet" href="http://code.jquery.com/ui/1.10.3/themes/smoothness/jquery-ui.css" /> | |
754 <script src="http://code.jquery.com/jquery-1.10.2.min.js"></script> | |
755 <script src="http://ajax.googleapis.com/ajax/libs/jquery/1.9.1/jquery.min.js"></script> | |
756 <script src="http://code.jquery.com/ui/1.10.3/jquery-ui.js"></script> | |
757 <script src="lightbox/js/jquery-1.10.2.min.js"></script> | |
758 <script src="lightbox/js/lightbox-2.6.min.js"></script> | |
759 <link href="lightbox/css/lightbox.css" rel="stylesheet" /> | |
760 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
761 <title>Dual coding result file</title> | |
762 <link rel="stylesheet" href="http://code.jquery.com/ui/1.10.3/themes/smoothness/jquery-ui.css" /> | |
763 <script> | |
764 (function(d){d.fn.stupidtable=function(b){return this.each(function(){var a=d(this);b=b||{};b=d.extend({},d.fn.stupidtable.default_sort_fns,b);var n=function(a,b){for(var f=[],c=0,e=a.slice(0).sort(b),h=0;h<a.length;h++){for(c=d.inArray(a[h],e);-1!=d.inArray(c,f);)c++;f.push(c)}return f},q=function(a,b){for(var d=a.slice(0),c=0,e=0;e<b.length;e++)c=b[e],d[c]=a[e];return d};a.on("click","th",function(){var m=a.children("tbody").children("tr"),g=d(this),f=0,c=d.fn.stupidtable.dir;a.find("th").slice(0, g.index()).each(function(){var a=d(this).attr("colspan")||1;f+=parseInt(a,10)});var e=g.data("sort-default")||c.ASC;g.data("sort-dir")&&(e=g.data("sort-dir")===c.ASC?c.DESC:c.ASC);var h=g.data("sort")||null;null!==h&&(a.trigger("beforetablesort",{column:f,direction:e}),a.css("display"),setTimeout(function(){var l=[],p=b[h];m.each(function(a,b){var c=d(b).children().eq(f),e=c.data("sort-value"),c="undefined"!==typeof e?e:c.text();l.push(c)});var k;k=e==c.ASC?n(l,p):n(l,function(a,b){return-p(a,b)}); a.find("th").data("sort-dir",null).removeClass("sorting-desc sorting-asc");g.data("sort-dir",e).addClass("sorting-"+e);k=d(q(m,k));a.children("tbody").remove();a.append("<tbody />").append(k);a.trigger("aftertablesort",{column:f,direction:e});a.css("display")},10))})})};d.fn.stupidtable.dir={ASC:"asc",DESC:"desc"};d.fn.stupidtable.default_sort_fns={"int":function(b,a){return parseInt(b,10)-parseInt(a,10)},"float":function(b,a){return parseFloat(b)-parseFloat(a)},string:function(b,a){return b<a?-1: b>a?1:0},"string-ins":function(b,a){b=b.toLowerCase();a=a.toLowerCase();return b<a?-1:b>a?1:0}}})(jQuery); | |
765 (function($) { | |
766 | |
767 $.fn.stupidtable = function(sortFns) { | |
768 return this.each(function() { | |
769 var $table = $(this); | |
770 sortFns = sortFns || {}; | |
771 | |
772 // ==================================================== // | |
773 // Utility functions // | |
774 // ==================================================== // | |
775 | |
776 // Merge sort functions with some default sort functions. | |
777 sortFns = $.extend({}, $.fn.stupidtable.default_sort_fns, sortFns); | |
778 | |
779 // Return the resulting indexes of a sort so we can apply | |
780 // this result elsewhere. This returns an array of index numbers. | |
781 // return[0] = x means "arr's 0th element is now at x" | |
782 var sort_map = function(arr, sort_function) { | |
783 var map = []; | |
784 var index = 0; | |
785 var sorted = arr.slice(0).sort(sort_function); | |
786 for (var i=0; i<arr.length; i++) { | |
787 index = $.inArray(arr[i], sorted); | |
788 | |
789 // If this index is already in the map, look for the next index. | |
790 // This handles the case of duplicate entries. | |
791 while ($.inArray(index, map) != -1) { | |
792 index++; | |
793 } | |
794 map.push(index); | |
795 } | |
796 | |
797 return map; | |
798 }; | |
799 | |
800 // Apply a sort map to the array. | |
801 var apply_sort_map = function(arr, map) { | |
802 var clone = arr.slice(0), | |
803 newIndex = 0; | |
804 for (var i=0; i<map.length; i++) { | |
805 newIndex = map[i]; | |
806 clone[newIndex] = arr[i]; | |
807 } | |
808 return clone; | |
809 }; | |
810 | |
811 // ==================================================== // | |
812 // Begin execution! // | |
813 // ==================================================== // | |
814 | |
815 // Do sorting when THs are clicked | |
816 $table.on("click", "th", function() { | |
817 var trs = $table.children("tbody").children("tr"); | |
818 var $this = $(this); | |
819 var th_index = 0; | |
820 var dir = $.fn.stupidtable.dir; | |
821 | |
822 $table.find("th").slice(0, $this.index()).each(function() { | |
823 var cols = $(this).attr("colspan") || 1; | |
824 th_index += parseInt(cols,10); | |
825 }); | |
826 | |
827 // Determine (and/or reverse) sorting direction, default `asc` | |
828 var sort_dir = $this.data("sort-default") || dir.ASC; | |
829 if ($this.data("sort-dir")) | |
830 sort_dir = $this.data("sort-dir") === dir.ASC ? dir.DESC : dir.ASC; | |
831 | |
832 // Choose appropriate sorting function. | |
833 var type = $this.data("sort") || null; | |
834 | |
835 // Prevent sorting if no type defined | |
836 if (type === null) { | |
837 return; | |
838 } | |
839 | |
840 // Trigger `beforetablesort` event that calling scripts can hook into; | |
841 // pass parameters for sorted column index and sorting direction | |
842 $table.trigger("beforetablesort", {column: th_index, direction: sort_dir}); | |
843 // More reliable method of forcing a redraw | |
844 $table.css("display"); | |
845 | |
846 // Run sorting asynchronously on a timout to force browser redraw after | |
847 // `beforetablesort` callback. Also avoids locking up the browser too much. | |
848 setTimeout(function() { | |
849 // Gather the elements for this column | |
850 var column = []; | |
851 var sortMethod = sortFns[type]; | |
852 | |
853 // Push either the value of the `data-order-by` attribute if specified | |
854 // or just the text() value in this column to column[] for comparison. | |
855 trs.each(function(index,tr) { | |
856 var $e = $(tr).children().eq(th_index); | |
857 var sort_val = $e.data("sort-value"); | |
858 var order_by = typeof(sort_val) !== "undefined" ? sort_val : $e.text(); | |
859 column.push(order_by); | |
860 }); | |
861 | |
862 // Create the sort map. This column having a sort-dir implies it was | |
863 // the last column sorted. As long as no data-sort-desc is specified, | |
864 // we're free to just reverse the column. | |
865 var theMap; | |
866 if (sort_dir == dir.ASC) | |
867 theMap = sort_map(column, sortMethod); | |
868 else | |
869 theMap = sort_map(column, function(a, b) { return -sortMethod(a, b); }); | |
870 | |
871 // Reset siblings | |
872 $table.find("th").data("sort-dir", null).removeClass("sorting-desc sorting-asc"); | |
873 $this.data("sort-dir", sort_dir).addClass("sorting-"+sort_dir); | |
874 | |
875 var sortedTRs = $(apply_sort_map(trs, theMap)); | |
876 $table.children("tbody").remove(); | |
877 $table.append("<tbody />").append(sortedTRs); | |
878 | |
879 // Trigger `aftertablesort` event. Similar to `beforetablesort` | |
880 $table.trigger("aftertablesort", {column: th_index, direction: sort_dir}); | |
881 // More reliable method of forcing a redraw | |
882 $table.css("display"); | |
883 }, 10); | |
884 }); | |
885 }); | |
886 }; | |
887 | |
888 // Enum containing sorting directions | |
889 $.fn.stupidtable.dir = {ASC: "asc", DESC: "desc"}; | |
890 | |
891 $.fn.stupidtable.default_sort_fns = { | |
892 "int": function(a, b) { | |
893 return parseInt(a, 10) - parseInt(b, 10); | |
894 }, | |
895 "float": function(a, b) { | |
896 return parseFloat(a) - parseFloat(b); | |
897 }, | |
898 "string": function(a, b) { | |
899 if (a < b) return -1; | |
900 if (a > b) return +1; | |
901 return 0; | |
902 }, | |
903 "string-ins": function(a, b) { | |
904 a = a.toLowerCase(); | |
905 b = b.toLowerCase(); | |
906 if (a < b) return -1; | |
907 if (a > b) return +1; | |
908 return 0; | |
909 } | |
910 }; | |
911 | |
912 })(jQuery); | |
913 $(function(){ | |
914 var table = $("table").stupidtable(); | |
915 | |
916 table.on("beforetablesort", function (event, data) { | |
917 // data.column - the index of the column sorted after a click | |
918 // data.direction - the sorting direction (either asc or desc) | |
919 $("#msg").text("Sorting index " + data.column) | |
920 }); | |
921 table.on("aftertablesort", function (event, data) { | |
922 var th = $(this).find("th"); | |
923 th.find(".arrow").remove(); | |
924 var dir = $.fn.stupidtable.dir; | |
925 var arrow = data.direction === dir.ASC ? "↑" : "↓"; | |
926 th.eq(data.column).append('<span class="arrow">' + arrow +'</span>'); | |
927 }); | |
928 }); | |
929 </script> | |
930 <style type="text/css"> | |
931 label { | |
932 display: inline-block; | |
933 width: 5em; | |
934 } | |
935 table { | |
936 border-collapse: collapse; | |
937 } | |
938 th, td { | |
939 padding: 5px 10px; | |
940 border: 1px solid #999; | |
941 } | |
942 th { | |
943 background-color: #a7d3ff; | |
944 } | |
945 th[data-sort]{ | |
946 cursor:pointer; | |
947 } | |
948 a[data-lightbox]{ | |
949 cursor:zoom-in; | |
950 } | |
951 #msg { | |
952 color: green; | |
953 } | |
954 </style> | |
955 </head> | |
956 <body> | |
957 <h1>Readthrough analyse results</h1> | |
958 %s | |
959 </body> | |
960 </html> """ % (gene_table) | |
961 | |
962 html_file = open(html,"w") | |
963 html_file.write(html_str) | |
964 html_file.close() | |
965 | |
966 | |
967 except Exception, e : | |
968 stop_err('Error during html page creation : ' + str( e ) ) | |
969 | |
970 | |
971 def __main__(): | |
972 | |
973 ## python metagene_readthrough.py -g Saccer3.gff -f Saccer3.fa -b B1_sorted.bam -o Bstrain_readthrough.csv | |
974 #python /home/rlegendre/galaxy/galaxy-dist/tools/rib_profiling/metagene_readthrough.py -g /home/rlegendre/galaxy/galaxy-dist/SharedData/Ribo/Saccer3.gff -f /home/rlegendre/galaxy/galaxy-dist/SharedData/Ribo/Saccer3.fa -b /data/Mapping/read_mapped_unique_psiP_sorted.bam -o /home/rlegendre/Documents/Translecture/plop_py.csv -d /home/rlegendre/Documents/Translecture/out_trans/ | |
975 #python /home/rlegendre/galaxy/galaxy-dist/tools/rib_profiling/metagene_readthrough.py -g /home/rlegendre/galaxy/galaxy-dist/SharedData/Ribo/Saccer3.gff -f /home/rlegendre/galaxy/galaxy-dist/SharedData/Ribo/Saccer3.fa -b /home/rlegendre/Documents/PDE2S/PDE2S_trim_no_rRNA_sorted.bam -o /home/rlegendre/Documents/PDE2S/Readthrough_pde2s.csv -d /home/rlegendre/Documents/PDE2S/out_trans | |
976 | |
977 | |
978 #Parse command line options | |
979 parser = optparse.OptionParser() | |
980 parser.add_option("-g", "--gff", dest="gff", type= "string", | |
981 help="GFF annotation file", metavar="FILE") | |
982 | |
983 parser.add_option("-f", "--fasta", dest="fasta", type= "string", | |
984 help="Fasta file ", metavar="FILE") | |
985 | |
986 parser.add_option("-b", "--bam", dest="bamfile", type= "string", | |
987 help="Bam Ribo-Seq alignments ", metavar="FILE") | |
988 | |
989 parser.add_option("-d", "--dirout", dest="dirout", type= "string", | |
990 help="write report in this html file and in associated directory", metavar="STR,STR") | |
991 | |
992 parser.add_option("-q", "--quiet", | |
993 action="store_false", dest="verbose", default=True, | |
994 help="don't print status messages to stdout") | |
995 | |
996 (options, args) = parser.parse_args() | |
997 sys.stdout.write("Begin readthrough analysis at %s\n" % time.asctime( time.localtime(time.time()))) | |
998 | |
999 try: | |
1000 (html_file, subfolder ) = options.dirout.split(",") | |
1001 if os.path.exists(subfolder): | |
1002 raise | |
1003 try: | |
1004 os.mkdir(subfolder) | |
1005 except: | |
1006 raise | |
1007 | |
1008 GFF = store_gff(options.gff) | |
1009 clean_file = cleaning_bam(options.bamfile) | |
1010 compute_analysis(clean_file, GFF, options.fasta, options.gff, subfolder) | |
1011 if os.path.exists( clean_file ): | |
1012 os.remove( clean_file ) | |
1013 | |
1014 write_html_page(html_file,subfolder) | |
1015 ##paste jquery script in result directory : | |
1016 jq_src = os.path.join(os.path.dirname(__file__),'lightbox') | |
1017 shutil.copytree(jq_src,os.path.join(subfolder,'lightbox')) | |
1018 | |
1019 | |
1020 sys.stdout.write("Finish readthrough analysis at %s\n" % time.asctime( time.localtime(time.time()))) | |
1021 except Exception, e: | |
1022 stop_err( 'Error running metagene readthrough analysis : ' + str( e ) ) | |
1023 | |
1024 | |
1025 if __name__=="__main__": | |
1026 __main__() |