annotate metagene_readthrough.py @ 10:707807fee542

(none)
author rlegendre
date Thu, 22 Jan 2015 14:34:38 +0100
parents 29c9c86e17e1
children 7c944fd9907e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
1 #!/usr/bin/env python2.7.3
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
2 #-*- coding: utf-8 -*-
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
3
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
4 '''
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
5 Created on Dec. 2013
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
6 @author: rachel legendre
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
7 @copyright: rachel.legendre@igmors.u-psud.fr
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
8 @license: GPL v3
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
9 '''
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
10
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
11 import os, sys, time, optparse, shutil, re, urllib, subprocess, tempfile
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
12 from urllib import unquote
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
13 from Bio import SeqIO
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
14 import csv
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
15 import pysam
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
16 import HTSeq
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
17 #from matplotlib import pyplot as pl
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
18 import matplotlib
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
19 matplotlib.use('Agg')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
20 import matplotlib.pyplot as pl
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
21 from numpy import arange
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
22 from matplotlib import ticker as t
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
23 from PIL import Image
10
rlegendre
parents: 6
diff changeset
24 import ribo_functions
rlegendre
parents: 6
diff changeset
25 ## suppress matplotlib warnings
rlegendre
parents: 6
diff changeset
26 import warnings
rlegendre
parents: 6
diff changeset
27
rlegendre
parents: 6
diff changeset
28
6
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
29
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
30 def stop_err( msg ):
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
31 sys.stderr.write( "%s\n" % msg )
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
32 sys.stderr.write( "Programme aborted at %s\n" % time.asctime(time.localtime(time.time())))
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
33 sys.exit()
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
34
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
35
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
36 def compute_rpkm(length,count_gene,count_tot) :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
37
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
38 try :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
39 rpkm = "{0:.4f}".format(count_gene*1000000.0/(count_tot*length))
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
40 except ArithmeticError :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
41 stop_err( 'Illegal division by zero')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
42 return float(rpkm)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
43
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
44
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
45 def find_stop(seq) :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
46 '''
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
47 Find stop codon in a sequence and return position of first nucleotide in stop
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
48 '''
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
49 stop_codon = ['TAA','TAG','TGA']
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
50 for nt in range(0,len(seq)-3,3) :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
51 codon = seq[nt:nt+3]
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
52 if codon in stop_codon :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
53 return nt
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
54
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
55 return -1
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
56
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
57
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
58 def check_met(seq):
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
59 '''
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
60 Boolean function for testing presence or absence of methionine in 5 codons following stop codon
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
61 '''
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
62 met = 'ATG'
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
63 for pos in range(0,15,3) :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
64 codon = seq[pos:pos+3]
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
65 if codon in met :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
66 return True
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
67
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
68 return False
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
69
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
70
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
71 '''
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
72 feature.iv is a GenomicInterval object :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
73 A GenomicInterval object has the following slots, some of which
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
74 are calculated from the other:
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
75
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
76 chrom: The name of a sequence (i.e., chromosome, contig, or
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
77 the like).
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
78 start: The start of the interval. Even on the reverse strand,
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
79 this is always the smaller of the two values 'start' and 'end'.
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
80 Note that all positions should be given as 0-based value!
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
81 end: The end of the interval. Following Python convention for
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
82 ranges, this in one more than the coordinate of the last base
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
83 that is considered part of the sequence.
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
84 strand: The strand, as a single character, '+' or '-'. '.' indicates
10
rlegendre
parents: 6
diff changeset
85 that the strand is irrelevant. (Alternatively, pass a Strand object.)
6
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
86 length: The length of the interval, i.e., end - start
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
87 start_d: The "directional start" position. This is the position of the
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
88 first base of the interval, taking the strand into account. Hence,
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
89 this is the same as 'start' except when strand == '-', in which
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
90 case it is end-1.
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
91 end_d: The "directional end": Usually, the same as 'end', but for
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
92 strand=='-1', it is start-1.
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
93
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
94 '''
10
rlegendre
parents: 6
diff changeset
95 def check_overlapping(gff_reader,chrm,start,stop,strand, name):
6
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
96
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
97 #### probleme avec les genes completement inclu...
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
98
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
99 iv2 = HTSeq.GenomicInterval(chrm,start,stop,strand)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
100 for feature in gff_reader:
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
101 if feature.type == "gene" :
10
rlegendre
parents: 6
diff changeset
102 if feature.iv.overlaps(iv2) and feature.attr.get('gene_name') != name :
6
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
103 ## if its a reverse gene, we replace start of extension by start of previous gene
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
104 if strand == '-' :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
105 return (feature.iv.end+3,stop)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
106 ## else we replace stop of extension by start of following gene
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
107 else :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
108 return (start,feature.iv.start-3)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
109 ## if no overlap are find, we return -1
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
110 return (start,stop)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
111
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
112
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
113 def pass_length(start,stop) :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
114
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
115 if (stop-start) > 25 :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
116 return True
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
117 else :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
118 return False
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
119
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
120
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
121 def check_homo_coverage(gene,GFF,start,stop, aln) :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
122
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
123 chrom = GFF[gene]['chrom']
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
124 ## compute nunber of nucleotides in CDS with a coverage equal to zero
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
125 nt_cds_cov = 0
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
126 nt_cds_num = 0
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
127 for i in range(1,GFF[gene]['exon_number']+1) :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
128 for z in range(GFF[gene]['exon'][i]['start'],GFF[gene]['exon'][i]['stop']):
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
129 nt_cds_num += 1
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
130 if aln.count(chrom,z,z+1) == 0 :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
131 nt_cds_cov += 1
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
132
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
133 ## compute percent of CDS no covering
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
134 percent = nt_cds_cov*100/nt_cds_num
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
135
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
136 ## compute number of nucleotides with no coverage in extension
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
137 nt_no_cover = 0
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
138 for pos in range(start,stop,1) :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
139 if aln.count(chrom,pos,pos+1) == 0 :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
140 nt_no_cover += 1
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
141 #print gene, nt_cds_cov, percent, nt_no_cover
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
142 #percent10 = (stop-start)*50/100
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
143 if (nt_no_cover*100)/(stop-start) > percent :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
144 return False
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
145 else :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
146 return True
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
147
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
148 def plot_gene ( aln, gene, start_extension, stop_extension, dirout ) :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
149
10
rlegendre
parents: 6
diff changeset
150
rlegendre
parents: 6
diff changeset
151 ### ignore matplotlib warnings for galaxy
rlegendre
parents: 6
diff changeset
152 warnings.filterwarnings('ignore')
6
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
153 try:
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
154 strand = gene['strand']
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
155 len_gene = gene['stop']-gene['start']
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
156 if strand is "-" :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
157 len_ext = gene['stop']-start_extension
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
158 ## coverage in all gene + extension
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
159 start = start_extension-100
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
160 stop = gene['stop']+100
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
161 vector1 = [0]*(stop-start)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
162 #for pileupcolumn in aln.pileup( gene['chrom'], start, stop):
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
163 # vector.append(pileupcolumn.n)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
164 for read in aln.fetch(gene['chrom'], start, stop):
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
165 if read.is_reverse :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
166 ## for get footprint in P-site (estimate) we take 3 nt in middle of read
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
167 #pos = (read.pos+13)-start
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
168 pos = (read.pos-start) + (read.rlen/2)-1
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
169 for z in range(pos,(pos+3)) :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
170 ## le fetch contient des reads qui chevauchent les 30 nt de la fin du gene mais dont le site P
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
171 ## se trouve avant notre vector, le z devient negatif et la couverture augmente en fin de vecteur (ce qui est faux)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
172 if z > 0 :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
173 try :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
174 vector1[z] += 1
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
175 except IndexError :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
176 pass
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
177 vector1.reverse()
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
178 mean_read = float(sum(vector1))/float(len(vector1))
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
179 cov = [(x/mean_read) for x in vector1]
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
180 idx_tot = arange(len(cov))
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
181 ## coverage in extension
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
182 start_ext = start_extension-40
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
183 stop_ext = stop_extension+30
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
184 vector2 = [0]*(stop_ext-start_ext)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
185 #for pileupcolumn in aln.pileup( gene['chrom'], start, stop):
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
186 # vector.append(pileupcolumn.n)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
187 for read in aln.fetch(gene['chrom'], start_extension, stop_ext):
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
188 if read.is_reverse :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
189 ## get footprint in P-site
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
190 #pos = (read.pos+13)-start_ext
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
191 pos = (read.pos-start_ext) + (read.rlen/2)-1
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
192 for z in range(pos,(pos+3)) :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
193 if z > 0 :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
194 try :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
195 vector2[z] += 1
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
196 except IndexError :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
197 pass
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
198 vector2.reverse()
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
199 #mean_read = float(sum(vector))/float(len(vector))
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
200 cov_ext = [(x/mean_read) for x in vector2]
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
201 _max = max(cov_ext[30::])
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
202 idx_ext = arange(len(cov_ext))
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
203
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
204 else :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
205 len_ext = stop_extension-gene['start']
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
206 start = gene['start']-100
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
207 stop = stop_extension+100
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
208 vector = [0]*(stop-start)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
209 #for pileupcolumn in aln.pileup( gene['chrom'], start, stop):
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
210 #vector.append(pileupcolumn.n)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
211 for read in aln.fetch(gene['chrom'], start, stop):
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
212 if not read.is_reverse :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
213 ## get footprint in P-site
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
214 #pos = (read.pos+12)-start
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
215 pos = (read.pos-start) + (read.rlen/2)-1
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
216 for z in range(pos,(pos+3)) :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
217 if z > 0 :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
218 try :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
219 vector[z] += 1
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
220 except IndexError :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
221 pass
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
222 mean_read = float(sum(vector))/float(len(vector))
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
223 cov = [(x/mean_read) for x in vector]
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
224 idx_tot = arange(len(cov))
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
225
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
226 ## coverage in extension
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
227 start_ext = gene['stop']-30
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
228 stop_ext = stop_extension+40
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
229 vector = [0]*(stop-start_ext)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
230 for read in aln.fetch(gene['chrom'], start_ext, stop_extension):
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
231 if not read.is_reverse :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
232 ## get footprint in P-site
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
233 pos = (read.pos-start_ext) + (read.rlen/2)-1
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
234 for z in range(pos,(pos+3)) :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
235 if z > 0 :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
236 try :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
237 vector[z] += 1
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
238 except IndexError :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
239 pass
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
240
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
241 cov_ext = [(x/mean_read) for x in vector]
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
242 idx_ext = arange(len(cov_ext))
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
243 _max = max(cov_ext[30::])
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
244
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
245 #### PLOT FIGURE ####
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
246
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
247 font = {'family' : 'serif','color': 'grey','weight' : 'normal','size' : 16 }
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
248
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
249
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
250 fig = pl.figure(num=1)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
251 ## create a big subplot for common y axis on two last subplot
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
252 ax = fig.add_subplot(2,1,2)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
253 ## hide all spines
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
254 ax.spines['top'].set_visible(False)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
255 ax.spines['bottom'].set_visible(False)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
256 ax.spines['left'].set_visible(False)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
257 ax.spines['right'].set_visible(False)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
258 ax.tick_params(labelcolor='w', top='off', bottom='off', left='off', right='off')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
259
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
260 ## plot gene structure
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
261 ax1 = fig.add_subplot(3,1,1)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
262 ax1.set_title(gene['name'])
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
263 ## hide all spines
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
264 ax1.spines['right'].set_color('none')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
265 ax1.spines['top'].set_color('none')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
266 ax1.spines['left'].set_color('none')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
267 ax1.spines['bottom'].set_color('none')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
268 ax1.set_ylim(0,5)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
269 #set xlim as second plot with 100nt switch
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
270 ax1.set_xlim(-100,len(idx_tot)-100)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
271 ## hide ticks
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
272 pl.yticks([])
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
273 pl.xticks([])
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
274 ## if it's a "simple" gene
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
275 if gene['exon_number'] == 1 :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
276 exon = arange(len_gene)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
277 x = [3]*len_gene
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
278 ax1.plot(exon,x,color='#9B9E9C')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
279 ax1.fill_between(exon,2,x,color='#9B9E9C')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
280 ## if it's a gene with introns
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
281 else :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
282 ## plot a line representing intron
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
283 intron = arange(len_gene)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
284 y = [2.5]*len_gene
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
285 ax1.plot(intron,y,color='#9B9E9C')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
286
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
287 ## plotting each intron
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
288 start = gene['start']
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
289 if strand == '+' :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
290 for exon in range(1,gene['exon_number']+1,1) :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
291 len_exon = gene['exon'][exon]['stop']-gene['exon'][exon]['start']
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
292 idx = gene['exon'][exon]['start'] - start
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
293 exo = arange(idx,idx+len_exon)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
294 x = [3]*len_exon
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
295 ax1.plot(exo,x,color='#9B9E9C')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
296 ax1.fill_between(exo,2,x,color='#9B9E9C')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
297 else :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
298 ## if it's a reverse gene we must reverse exon's position
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
299 start = gene['start']
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
300 tabF = [2.5]*len_gene
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
301 tabR = [2.5]*len_gene
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
302 for exon in range(1,gene['exon_number']+1,1) :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
303 len_exon = gene['exon'][exon]['stop']-gene['exon'][exon]['start']
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
304 idx = gene['exon'][exon]['start'] - start
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
305 exo = arange(idx,idx+len_exon)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
306 for z in exo :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
307 tabF[z] = 3
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
308 tabR[z] = 2
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
309 tabF.reverse()
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
310 tabR.reverse()
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
311 #pl.ylim(0,5)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
312 ax1.plot(tabF,color='#9B9E9C')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
313 ax1.plot(tabR,color='#9B9E9C')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
314 x = arange(len(tabR))
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
315 ax1.fill_between(x,tabF,tabR,color='#9B9E9C')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
316
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
317 ## insert arrows (as genome browser representation)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
318 yloc = 2.5
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
319 narrows = 20
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
320 exonwidth = .8
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
321 spread = .4 * len_gene / narrows
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
322 for i in range(narrows):
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
323 loc = (float(i) * len_gene / narrows)+ (len(idx_tot)/100)*2
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
324 if strand == '+' :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
325 x = [loc - spread, loc, loc - spread]
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
326 else:
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
327 x = [loc - spread, loc, loc - spread]
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
328 y = [yloc - exonwidth / 5, yloc, yloc + exonwidth / 5]
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
329 ax1.plot(x, y, lw=1.4, color='#676B69')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
330
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
331
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
332 # plot coverage in all gene + extension
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
333 ax2 = fig.add_subplot(3,1,2)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
334 ## fixe limit to length of coverage for x axis
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
335 ax2.set_xlim(0,len(idx_tot))
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
336 ## fixe 4 ticks and associated labels for y axis
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
337 ax2.set_yticklabels(arange(0,int(max(cov)+20),int((max(cov)+20)/4)))
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
338 ax2.set_yticks(arange(0,int(max(cov)+20),int((max(cov)+20)/4)))
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
339 ### add start and stop of gene in axe in place of number
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
340 ax2.set_xticks([100,(100+len_gene),(100+len_ext)])
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
341 ax2.set_xticklabels(["{:,}".format(gene['start']),"{:,}".format(gene['stop']),""])
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
342 ## hide spines and any axis
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
343 ax2.spines['right'].set_visible(False)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
344 ax2.spines['top'].set_visible(False)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
345 ax2.yaxis.set_ticks_position('left')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
346 ax2.xaxis.set_ticks_position('bottom')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
347 ## plot and fill
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
348 ax2.plot(idx_tot, cov, color="#CC0011")
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
349 ax2.fill_between(idx_tot, 0,cov,color="#CC0011")
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
350 ax2.set_title("Genomic coordinates (%s,%s)"% (gene['strand'],gene['chrom']) ,fontdict={'fontsize':'small'})
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
351
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
352 # plot zoom coverage in extension
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
353 ax3 = fig.add_subplot(3,1,3)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
354 ## fixe limit to length of coverage for x axis
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
355 ax3.set_ylim(0,int(_max))
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
356 # we hide spines
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
357 ax3.spines['right'].set_visible(False)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
358 ax3.spines['top'].set_visible(False)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
359 ax3.yaxis.set_ticks_position('left')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
360 ax3.xaxis.set_ticks_position('bottom')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
361 ## add stop and in-frame stop in x axis
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
362 pl.xticks([30,( stop_extension-start_extension)+30],["stop codon","next in-frame stop"])
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
363 ## fixe good position for labels
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
364 (ax3.get_xticklabels()[0]).set_horizontalalignment('center')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
365 (ax3.get_xticklabels()[1]).set_horizontalalignment('left')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
366 #if max coverage is lower than 2, we have a illegal division by zero
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
367 if _max > 2 :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
368 ax3.set_yticklabels(arange(0,int(_max+1),int((_max+1)/3)))
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
369 ax3.set_yticks(arange(0,int(_max+1),int((_max+1)/3)))
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
370 else :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
371 ##
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
372 ax3.set_ylim(0,_max)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
373 ax3.ticklabel_format(style='sci', scilimits=(-2,2), axis='y',useOffset=False)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
374
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
375 ax3.plot(idx_ext, cov_ext, color="#CC0011")
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
376 ax3.fill_between(idx_ext, 0,cov_ext,color="#CC0011")
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
377
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
378
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
379 ## get scale of subplot 3
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
380 #ax3.text(ax3.get_xlim()[1]-50,ax3.get_ylim()[1]-1, r'50 nt', fontdict=font)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
381 #pl.arrow( ax3.get_xlim()[1]-50, ax3.get_ylim()[1]-2, ax3.get_xlim()[1]-50, 0, fc="grey", ec="grey",lw=2)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
382
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
383 ## set common y label
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
384 ax.set_ylabel('Normalized footprint counts',labelpad=20)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
385
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
386 ## draw and save plot
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
387 pl.draw()
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
388 #pl.show()
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
389 pl.savefig(dirout+".png",format='png', dpi=300)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
390 pl.clf()
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
391
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
392
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
393 ## Make thumbnail for html page
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
394 infile = dirout+'.png'
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
395 size = 128, 128
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
396 im = Image.open(infile)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
397 im.thumbnail(size, Image.ANTIALIAS)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
398 im.save(dirout+"_thumbnail.png", "PNG")
10
rlegendre
parents: 6
diff changeset
399 warnings.resetwarnings()
6
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
400
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
401 except Exception, e:
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
402 stop_err( 'Error during gene plotting : ' + str( e ) )
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
403
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
404
10
rlegendre
parents: 6
diff changeset
405 def compute_analysis(bam, GFF, fasta, gff, dirout, extend) :
6
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
406
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
407 try:
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
408 background_file = dirout+"/background_sequence.fasta"
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
409 file_back = open(background_file, 'w')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
410 file_context = open(dirout+"/stop_context.fasta", 'w')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
411 file_extension = open(dirout+"/extensions.fasta", 'w')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
412 ## Opening Bam file with pysam librarie
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
413 pysam.index(bam)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
414 aln = pysam.Samfile(bam, "rb",header=True, check_header = True)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
415 ## Opening fasta file in a dict with BioPython
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
416 SeqDict = SeqIO.to_dict(SeqIO.parse(open(fasta,"r"),"fasta"))
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
417
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
418 ## count total read in bam file
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
419 cmd = "samtools view -c %s " % (bam)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
420 proc = subprocess.Popen( args=cmd, shell=True, stdout = subprocess.PIPE)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
421 count_tot = int(proc.stdout.read().rstrip())
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
422 returncode = proc.wait()
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
423
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
424 ## opening a GFF reader for check overlapping
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
425 gff_reader = HTSeq.GFF_Reader(gff)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
426
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
427 with open(dirout+"/readthrough_result.csv","w") as out :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
428 myheader = ['Gene','Name', 'FAIL', 'Stop context','chrom','start extension','stop extension','length extension','RPKM CDS', 'RPKM extension','ratio','Annotation','sequence']
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
429 writer = csv.writer(out,delimiter='\t')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
430 writer.writerow(myheader)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
431 for gene in GFF['order'] :
10
rlegendre
parents: 6
diff changeset
432 #print GFF[gene]
rlegendre
parents: 6
diff changeset
433 ## maybe no start position in GTF file so we must to check and replace
rlegendre
parents: 6
diff changeset
434 exon_number = GFF[gene]['exon_number']
rlegendre
parents: 6
diff changeset
435 try : GFF[gene]['start']
rlegendre
parents: 6
diff changeset
436 except :
rlegendre
parents: 6
diff changeset
437 if GFF[gene]['strand'] == '+' :
rlegendre
parents: 6
diff changeset
438 GFF[gene]['start'] = GFF[gene]['exon'][1]['start']
rlegendre
parents: 6
diff changeset
439 else :
rlegendre
parents: 6
diff changeset
440 GFF[gene]['start'] = GFF[gene]['exon'][exon_number]['stop']
rlegendre
parents: 6
diff changeset
441 ## also for stop coordinates
rlegendre
parents: 6
diff changeset
442 try : GFF[gene]['stop']
rlegendre
parents: 6
diff changeset
443 except :
rlegendre
parents: 6
diff changeset
444 if GFF[gene]['strand'] == '+' :
rlegendre
parents: 6
diff changeset
445 GFF[gene]['stop'] = GFF[gene]['exon'][exon_number]['stop']
rlegendre
parents: 6
diff changeset
446
rlegendre
parents: 6
diff changeset
447 else :
rlegendre
parents: 6
diff changeset
448 GFF[gene]['stop'] = GFF[gene]['exon'][1]['start']
rlegendre
parents: 6
diff changeset
449
rlegendre
parents: 6
diff changeset
450
6
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
451 indic = ""
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
452 # compute rpkm of CDS :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
453 len_cds = GFF[gene]['stop']-GFF[gene]['start']
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
454 count_cds = 0
10
rlegendre
parents: 6
diff changeset
455 rpkm_cds = 0
rlegendre
parents: 6
diff changeset
456 count_cds = 0
rlegendre
parents: 6
diff changeset
457 try :
rlegendre
parents: 6
diff changeset
458 ### count method of pysam doesn't strand information
rlegendre
parents: 6
diff changeset
459 if GFF[gene]['strand'] == '+' :
rlegendre
parents: 6
diff changeset
460 for track in aln.fetch(GFF[gene]['chrom'],GFF[gene]['start']+12,GFF[gene]['stop']-15) :
rlegendre
parents: 6
diff changeset
461 if not track.is_reverse :
rlegendre
parents: 6
diff changeset
462 count_cds += 1
rlegendre
parents: 6
diff changeset
463 else :
rlegendre
parents: 6
diff changeset
464 for track in aln.fetch(GFF[gene]['chrom'],GFF[gene]['start']+15,GFF[gene]['stop']-12) :
rlegendre
parents: 6
diff changeset
465 if track.is_reverse :
rlegendre
parents: 6
diff changeset
466 count_cds += 1
rlegendre
parents: 6
diff changeset
467 rpkm_cds = compute_rpkm(len_cds,count_cds,count_tot)
rlegendre
parents: 6
diff changeset
468 except ValueError:
rlegendre
parents: 6
diff changeset
469 ## genere warning about gtf coordinates
rlegendre
parents: 6
diff changeset
470 #warnings.warn("Please check coordinates in GFF : maybe stop or start codon are missing" )
rlegendre
parents: 6
diff changeset
471 pass
6
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
472 ## Only if gene is translate :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
473 if rpkm_cds > 0 and count_cds > 128:
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
474 ## search footprint in UTR3
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
475 count = 0
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
476 try :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
477 if GFF[gene]['strand'] == '+' :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
478 contexte_stop = str(SeqDict[GFF[gene]['chrom']].seq[GFF[gene]['stop']-6:GFF[gene]['stop']+6])
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
479 #print gene, contexte_stop
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
480 start_extension = GFF[gene]['stop']
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
481 stop_extension = GFF[gene]['stop']+90
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
482 for track in aln.fetch(GFF[gene]['chrom'],start_extension+15,stop_extension) :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
483 if not track.is_reverse :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
484 count += 1
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
485
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
486 ## get sequence of extension
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
487 seq = str(SeqDict[GFF[gene]['chrom']].seq[start_extension:stop_extension])
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
488
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
489 else :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
490 contexte_stop = str(SeqDict[GFF[gene]['chrom']].seq[GFF[gene]['start']-7:GFF[gene]['start']+5].reverse_complement())
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
491 #print gene, contexte_stop
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
492 start_extension = GFF[gene]['start']-90
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
493 stop_extension = GFF[gene]['start']
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
494 for track in aln.fetch(GFF[gene]['chrom'],start_extension,stop_extension-15) :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
495 if track.is_reverse :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
496 count += 1
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
497 ## get sequence of extension
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
498 seq = str(SeqDict[GFF[gene]['chrom']].seq[start_extension:stop_extension-1].reverse_complement())
10
rlegendre
parents: 6
diff changeset
499
rlegendre
parents: 6
diff changeset
500
6
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
501 ## if we have coverage after cds stop codon
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
502 if count > 10 :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
503 res = find_stop(seq)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
504 if res == -1 :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
505 '''
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
506 Write results with no stop but RPF in extension
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
507 '''
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
508 ## check if next in-frame codon is far than 90 nt extension :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
509 if GFF[gene]['strand'] == '+' :
10
rlegendre
parents: 6
diff changeset
510 pos = check_overlapping(gff_reader,GFF[gene]['chrom'],GFF[gene]['stop']+1,GFF[gene]['stop']+extend,'+',GFF[gene]['name'])
6
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
511 start_extension = pos[0]-1
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
512 stop_extension = pos[1]
10
rlegendre
parents: 6
diff changeset
513 #start_extension = GFF[gene]['stop']
rlegendre
parents: 6
diff changeset
514 #stop_extension = GFF[gene]['stop']+extend
6
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
515 seq = str(SeqDict[GFF[gene]['chrom']].seq[start_extension:stop_extension])
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
516
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
517 #print gene,count,pos,'\n',seq
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
518
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
519 if (seq):
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
520 res = find_stop(seq)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
521 if res == -1 :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
522 mylist = [gene,GFF[gene]['name'],'no stop',contexte_stop, GFF[gene]['chrom'], start_extension, stop_extension,stop_extension-start_extension,rpkm_cds,'-','-',GFF[gene]['note'],seq]
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
523 writer.writerow(mylist)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
524 else :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
525 indic = 'ok'
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
526 #print res
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
527 #stop_extension = start_extension + res +3
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
528 else :
10
rlegendre
parents: 6
diff changeset
529 pos = check_overlapping(gff_reader,GFF[gene]['chrom'],GFF[gene]['start']-extend,GFF[gene]['start']-1,'-',GFF[gene]['name'])
6
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
530 start_extension = pos[0]
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
531 stop_extension = pos[1]+1
10
rlegendre
parents: 6
diff changeset
532 #start_extension = GFF[gene]['start']-extend
rlegendre
parents: 6
diff changeset
533 #stop_extension = GFF[gene]['start']
6
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
534 seq = str(SeqDict[GFF[gene]['chrom']].seq[start_extension:stop_extension-1].reverse_complement())
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
535
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
536 #print gene,count,pos,'\n',seq
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
537
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
538 if (seq):
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
539 res = find_stop(seq)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
540 if res == -1 :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
541 mylist = [gene,GFF[gene]['name'],'no stop',contexte_stop, GFF[gene]['chrom'], start_extension, stop_extension,stop_extension-start_extension,rpkm_cds,'-','-',GFF[gene]['note'],seq]
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
542 writer.writerow(mylist)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
543 else :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
544 indic = 'ok'
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
545 #print res
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
546 #start_extension = stop_extension - res -3
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
547 else :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
548 indic = 'ok'
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
549
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
550
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
551 if indic == 'ok' :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
552 ## We save new coordinates
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
553 if GFF[gene]['strand'] == '+' :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
554 stop_extension = start_extension + res +3
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
555 #print gene, count
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
556 #print gene,start_extension,stop_extension
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
557 seq = str(SeqDict[GFF[gene]['chrom']].seq[start_extension:stop_extension])
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
558 #print seq
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
559 count_stop = aln.count(GFF[gene]['chrom'],stop_extension-2,stop_extension+2)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
560 if pass_length(start_extension,stop_extension) :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
561 count_ext = aln.count(GFF[gene]['chrom'],start_extension+9,stop_extension-15)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
562 if stop_extension > GFF[gene]['stop']+9 :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
563 stop_ok = 1
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
564 else :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
565 stop_ok = 0
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
566 else :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
567 start_extension = stop_extension - res - 3
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
568 #print gene, count
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
569 #print gene,start_extension,stop_extension
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
570 seq = str(SeqDict[GFF[gene]['chrom']].seq[start_extension-1:stop_extension-1].reverse_complement())
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
571 #print seq
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
572 count_stop = aln.count(GFF[gene]['chrom'],start_extension-2,start_extension+2)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
573 if pass_length(start_extension,stop_extension) :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
574 count_ext = aln.count(GFF[gene]['chrom'],start_extension+15,stop_extension-9)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
575 if start_extension < GFF[gene]['start']-9 :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
576 stop_ok = 1
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
577 else :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
578 stop_ok = 0
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
579 ## if we are no methionine in 5 codons following stop of CDS
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
580 if (not check_met(seq) ):
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
581 ## if we have footprint in stop codon extension and stop extension is further than cds_stop+9
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
582 if count_stop > 2 and stop_ok == 1 :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
583 homo_cov = check_homo_coverage(gene,GFF,start_extension,stop_extension,aln)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
584 if (homo_cov) :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
585 '''
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
586 write result witch corresponding to readthrough
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
587 '''
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
588 ##if length of extension upper than 25 we can compute rpkm
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
589 if (not pass_length(start_extension,stop_extension)) :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
590 len_ext = stop_extension-start_extension
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
591 rpkm_ext = 'nan'
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
592 ratio = 'nan'
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
593 else :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
594 len_ext = stop_extension-start_extension
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
595 rpkm_ext = compute_rpkm(len_ext,count_ext,count_tot)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
596 ## compute ratio between extension coverage and cds coverage (rpkm)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
597 ratio = rpkm_ext/rpkm_cds
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
598 #print gene,ratio
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
599 #print start_extension,stop_extension
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
600 mylist = [gene,GFF[gene]['name'],'-',contexte_stop,GFF[gene]['chrom'], start_extension, stop_extension,stop_extension-start_extension,rpkm_cds,rpkm_ext,ratio,GFF[gene]['note'],seq]
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
601 writer.writerow(mylist)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
602 file_context.write('>'+gene+'\n'+contexte_stop+'\n')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
603 file_extension.write('>'+gene+'\n'+seq+'\n')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
604 else :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
605 '''
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
606 write result witch corresponding to readthrough but with no homogeneous coverage
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
607 '''
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
608 if (not pass_length(start_extension,stop_extension)) :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
609 len_ext = stop_extension-start_extension
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
610 rpkm_ext = 'nan'
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
611 ratio = 'nan'
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
612 else :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
613 len_ext = stop_extension-start_extension
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
614 rpkm_ext = compute_rpkm(len_ext,count_ext,count_tot)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
615 ## compute ratio between extension coverage and cds coverage (rpkm)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
616 ratio = rpkm_ext/rpkm_cds
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
617 mylist = [gene,GFF[gene]['name'],'hetero cov',contexte_stop, GFF[gene]['chrom'], start_extension, stop_extension,stop_extension-start_extension,rpkm_cds,rpkm_ext,ratio,GFF[gene]['note'],seq]
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
618 writer.writerow(mylist)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
619 file_context.write('>'+gene+'\n'+contexte_stop+'\n')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
620 file_extension.write('>'+gene+'\n'+seq+'\n')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
621 #print ">"+gene+"\n"+contexte_stop
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
622
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
623 ## plot gene :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
624 plot_gene(aln, GFF[gene], start_extension, stop_extension, dirout+"/"+gene)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
625
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
626
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
627
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
628 else :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
629 '''
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
630 write result with no footprint in stop codon of extension
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
631 '''
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
632 mylist = [gene,GFF[gene]['name'],'no RPF in stop',contexte_stop, GFF[gene]['chrom'], start_extension, stop_extension,stop_extension-start_extension,rpkm_cds,'-','-',GFF[gene]['note'],seq]
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
633 writer.writerow(mylist)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
634 file_context.write('>'+gene+'\n'+contexte_stop+'\n')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
635 file_extension.write('>'+gene+'\n'+seq+'\n')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
636 #print ">"+gene+"\n"+contexte_stop
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
637 else :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
638 '''
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
639 write result with RPF maybe result of reinitiation on a start codon
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
640 '''
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
641 if pass_length(start_extension,stop_extension) :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
642 mylist = [gene,GFF[gene]['name'],'Met after stop', contexte_stop, GFF[gene]['chrom'], start_extension, stop_extension,stop_extension-start_extension,rpkm_cds,'-','-',GFF[gene]['note'],seq]
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
643 writer.writerow(mylist)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
644 file_context.write('>'+gene+'\n'+contexte_stop+'\n')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
645 file_extension.write('>'+gene+'\n'+seq+'\n')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
646 #print ">"+gene+"\n"+contexte_stop
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
647 else :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
648 ## if its not a interesting case, we get stop context of genes without readthrough
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
649 if GFF[gene]['strand'] == '+' :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
650 contexte_stop = str(SeqDict[GFF[gene]['chrom']].seq[GFF[gene]['stop']-6:GFF[gene]['stop']+6])
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
651 file_back.write(contexte_stop+'\n')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
652 else :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
653 contexte_stop = str(SeqDict[GFF[gene]['chrom']].seq[GFF[gene]['start']-7:GFF[gene]['start']+5].reverse_complement())
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
654 file_back.write(contexte_stop+'\n')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
655
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
656 ## excluded UT with incorrect positions
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
657 except ValueError:
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
658 pass
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
659
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
660
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
661 file_context.close()
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
662 file_back.close()
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
663 file_extension.close()
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
664 except Exception,e:
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
665 stop_err( 'Error during computing analysis : ' + str( e ) )
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
666
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
667
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
668 def cleaning_bam(bam):
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
669 '''
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
670 Remove reads unmapped, non uniquely mapped and reads with length lower than 25 and upper than 32, and mapping quality upper than 12
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
671 '''
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
672 try :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
673 header = subprocess.check_output(['samtools', 'view', '-H', bam], stderr= subprocess.PIPE)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
674 #header = results.split('\n')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
675
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
676 # check mapper for define multiple tag
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
677 if re.search('bowtie', header):
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
678 multi_tag = "XS:i:"
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
679 elif re.search('bwa', header):
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
680 multi_tag = "XT:A:M"
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
681 else :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
682 stop_err("No PG tag find in"+bam+". Please use bowtie or bwa for mapping")
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
683
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
684 tmp_sam = tempfile.mktemp()
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
685 cmd = "samtools view %s > %s" % (bam, tmp_sam)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
686 proc = subprocess.Popen( args=cmd, shell=True, stderr = subprocess.PIPE)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
687 returncode = proc.wait()
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
688
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
689
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
690 with open(tempfile.mktemp(),'w') as out :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
691 out.write(header)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
692 with open(tmp_sam,'r') as sam :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
693 for line in sam :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
694 if multi_tag not in line and line.split('\t')[1] != '4' and int(line.split('\t')[4]) > 12 :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
695 if len(line.split('\t')[9]) < 32 and len(line.split('\t')[9]) > 25 :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
696 out.write(line)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
697 bamfile = tempfile.mktemp()+'.bam'
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
698 cmd = "samtools view -hSb %s > %s" % (out.name,bamfile)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
699 proc = subprocess.Popen( args=cmd, shell=True, stderr = subprocess.PIPE)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
700 returncode = proc.wait()
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
701
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
702 return bamfile
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
703
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
704 except Exception,e:
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
705 stop_err( 'Error during cleaning bam : ' + str( e ) )
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
706
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
707 def write_html_page(html,subfolder) :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
708
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
709
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
710 try :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
711
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
712 gene_table = ''
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
713 gene_table += '<table>'
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
714 gene_table += '<thead><tr><th data-sort="string">Gene</th><th>Plot</th><th data-sort="string">Name</th><th>Stop context</th><th>Coordinates</th><th>RPKM CDS</th><th>RPKM extension</th><th data-sort="float">ratio</th><th>Extension</th></tr></thead><tbody>'
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
715
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
716 with open(os.path.join(subfolder,'readthrough_result.csv'), 'rb') as csvfile:
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
717 spamreader = csv.reader(csvfile, delimiter='\t')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
718 ## skip the header
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
719 next(spamreader, None)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
720 for row in spamreader:
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
721 if row[2] == '-' :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
722 gene_table += '<tr><td>%s</td><td><a href="%s.png" data-lightbox="%s"><img src="%s_thumbnail.png" /></a></td><td><a title="%s">%s</a></td><td>%s</td><td>%s:%s-%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>' %(row[0], row[0], row[0], row[0], row[11], row[1], row[3], row[4], row[5], row[6], row[8], row[9], row[10], row[12])
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
723
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
724 gene_table += '</tbody></table>'
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
725
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
726
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
727
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
728 html_str = """
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
729 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
730 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
731
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
732 <html xmlns="http://www.w3.org/1999/xhtml">
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
733 <head>
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
734 <link rel="stylesheet" href="http://code.jquery.com/ui/1.10.3/themes/smoothness/jquery-ui.css" />
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
735 <script src="http://code.jquery.com/jquery-1.10.2.min.js"></script>
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
736 <script src="http://ajax.googleapis.com/ajax/libs/jquery/1.9.1/jquery.min.js"></script>
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
737 <script src="http://code.jquery.com/ui/1.10.3/jquery-ui.js"></script>
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
738 <script src="lightbox/js/jquery-1.10.2.min.js"></script>
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
739 <script src="lightbox/js/lightbox-2.6.min.js"></script>
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
740 <link href="lightbox/css/lightbox.css" rel="stylesheet" />
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
741 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
742 <title>Dual coding result file</title>
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
743 <link rel="stylesheet" href="http://code.jquery.com/ui/1.10.3/themes/smoothness/jquery-ui.css" />
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
744 <script>
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
745 (function(d){d.fn.stupidtable=function(b){return this.each(function(){var a=d(this);b=b||{};b=d.extend({},d.fn.stupidtable.default_sort_fns,b);var n=function(a,b){for(var f=[],c=0,e=a.slice(0).sort(b),h=0;h<a.length;h++){for(c=d.inArray(a[h],e);-1!=d.inArray(c,f);)c++;f.push(c)}return f},q=function(a,b){for(var d=a.slice(0),c=0,e=0;e<b.length;e++)c=b[e],d[c]=a[e];return d};a.on("click","th",function(){var m=a.children("tbody").children("tr"),g=d(this),f=0,c=d.fn.stupidtable.dir;a.find("th").slice(0, g.index()).each(function(){var a=d(this).attr("colspan")||1;f+=parseInt(a,10)});var e=g.data("sort-default")||c.ASC;g.data("sort-dir")&&(e=g.data("sort-dir")===c.ASC?c.DESC:c.ASC);var h=g.data("sort")||null;null!==h&&(a.trigger("beforetablesort",{column:f,direction:e}),a.css("display"),setTimeout(function(){var l=[],p=b[h];m.each(function(a,b){var c=d(b).children().eq(f),e=c.data("sort-value"),c="undefined"!==typeof e?e:c.text();l.push(c)});var k;k=e==c.ASC?n(l,p):n(l,function(a,b){return-p(a,b)}); a.find("th").data("sort-dir",null).removeClass("sorting-desc sorting-asc");g.data("sort-dir",e).addClass("sorting-"+e);k=d(q(m,k));a.children("tbody").remove();a.append("<tbody />").append(k);a.trigger("aftertablesort",{column:f,direction:e});a.css("display")},10))})})};d.fn.stupidtable.dir={ASC:"asc",DESC:"desc"};d.fn.stupidtable.default_sort_fns={"int":function(b,a){return parseInt(b,10)-parseInt(a,10)},"float":function(b,a){return parseFloat(b)-parseFloat(a)},string:function(b,a){return b<a?-1: b>a?1:0},"string-ins":function(b,a){b=b.toLowerCase();a=a.toLowerCase();return b<a?-1:b>a?1:0}}})(jQuery);
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
746 (function($) {
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
747
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
748 $.fn.stupidtable = function(sortFns) {
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
749 return this.each(function() {
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
750 var $table = $(this);
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
751 sortFns = sortFns || {};
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
752
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
753 // ==================================================== //
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
754 // Utility functions //
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
755 // ==================================================== //
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
756
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
757 // Merge sort functions with some default sort functions.
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
758 sortFns = $.extend({}, $.fn.stupidtable.default_sort_fns, sortFns);
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
759
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
760 // Return the resulting indexes of a sort so we can apply
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
761 // this result elsewhere. This returns an array of index numbers.
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
762 // return[0] = x means "arr's 0th element is now at x"
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
763 var sort_map = function(arr, sort_function) {
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
764 var map = [];
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
765 var index = 0;
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
766 var sorted = arr.slice(0).sort(sort_function);
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
767 for (var i=0; i<arr.length; i++) {
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
768 index = $.inArray(arr[i], sorted);
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
769
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
770 // If this index is already in the map, look for the next index.
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
771 // This handles the case of duplicate entries.
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
772 while ($.inArray(index, map) != -1) {
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
773 index++;
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
774 }
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
775 map.push(index);
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
776 }
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
777
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
778 return map;
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
779 };
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
780
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
781 // Apply a sort map to the array.
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
782 var apply_sort_map = function(arr, map) {
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
783 var clone = arr.slice(0),
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
784 newIndex = 0;
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
785 for (var i=0; i<map.length; i++) {
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
786 newIndex = map[i];
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
787 clone[newIndex] = arr[i];
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
788 }
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
789 return clone;
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
790 };
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
791
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
792 // ==================================================== //
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
793 // Begin execution! //
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
794 // ==================================================== //
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
795
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
796 // Do sorting when THs are clicked
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
797 $table.on("click", "th", function() {
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
798 var trs = $table.children("tbody").children("tr");
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
799 var $this = $(this);
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
800 var th_index = 0;
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
801 var dir = $.fn.stupidtable.dir;
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
802
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
803 $table.find("th").slice(0, $this.index()).each(function() {
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
804 var cols = $(this).attr("colspan") || 1;
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
805 th_index += parseInt(cols,10);
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
806 });
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
807
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
808 // Determine (and/or reverse) sorting direction, default `asc`
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
809 var sort_dir = $this.data("sort-default") || dir.ASC;
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
810 if ($this.data("sort-dir"))
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
811 sort_dir = $this.data("sort-dir") === dir.ASC ? dir.DESC : dir.ASC;
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
812
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
813 // Choose appropriate sorting function.
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
814 var type = $this.data("sort") || null;
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
815
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
816 // Prevent sorting if no type defined
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
817 if (type === null) {
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
818 return;
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
819 }
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
820
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
821 // Trigger `beforetablesort` event that calling scripts can hook into;
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
822 // pass parameters for sorted column index and sorting direction
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
823 $table.trigger("beforetablesort", {column: th_index, direction: sort_dir});
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
824 // More reliable method of forcing a redraw
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
825 $table.css("display");
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
826
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
827 // Run sorting asynchronously on a timout to force browser redraw after
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
828 // `beforetablesort` callback. Also avoids locking up the browser too much.
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
829 setTimeout(function() {
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
830 // Gather the elements for this column
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
831 var column = [];
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
832 var sortMethod = sortFns[type];
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
833
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
834 // Push either the value of the `data-order-by` attribute if specified
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
835 // or just the text() value in this column to column[] for comparison.
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
836 trs.each(function(index,tr) {
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
837 var $e = $(tr).children().eq(th_index);
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
838 var sort_val = $e.data("sort-value");
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
839 var order_by = typeof(sort_val) !== "undefined" ? sort_val : $e.text();
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
840 column.push(order_by);
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
841 });
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
842
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
843 // Create the sort map. This column having a sort-dir implies it was
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
844 // the last column sorted. As long as no data-sort-desc is specified,
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
845 // we're free to just reverse the column.
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
846 var theMap;
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
847 if (sort_dir == dir.ASC)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
848 theMap = sort_map(column, sortMethod);
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
849 else
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
850 theMap = sort_map(column, function(a, b) { return -sortMethod(a, b); });
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
851
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
852 // Reset siblings
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
853 $table.find("th").data("sort-dir", null).removeClass("sorting-desc sorting-asc");
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
854 $this.data("sort-dir", sort_dir).addClass("sorting-"+sort_dir);
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
855
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
856 var sortedTRs = $(apply_sort_map(trs, theMap));
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
857 $table.children("tbody").remove();
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
858 $table.append("<tbody />").append(sortedTRs);
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
859
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
860 // Trigger `aftertablesort` event. Similar to `beforetablesort`
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
861 $table.trigger("aftertablesort", {column: th_index, direction: sort_dir});
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
862 // More reliable method of forcing a redraw
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
863 $table.css("display");
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
864 }, 10);
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
865 });
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
866 });
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
867 };
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
868
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
869 // Enum containing sorting directions
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
870 $.fn.stupidtable.dir = {ASC: "asc", DESC: "desc"};
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
871
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
872 $.fn.stupidtable.default_sort_fns = {
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
873 "int": function(a, b) {
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
874 return parseInt(a, 10) - parseInt(b, 10);
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
875 },
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
876 "float": function(a, b) {
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
877 return parseFloat(a) - parseFloat(b);
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
878 },
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
879 "string": function(a, b) {
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
880 if (a < b) return -1;
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
881 if (a > b) return +1;
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
882 return 0;
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
883 },
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
884 "string-ins": function(a, b) {
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
885 a = a.toLowerCase();
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
886 b = b.toLowerCase();
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
887 if (a < b) return -1;
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
888 if (a > b) return +1;
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
889 return 0;
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
890 }
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
891 };
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
892
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
893 })(jQuery);
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
894 $(function(){
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
895 var table = $("table").stupidtable();
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
896
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
897 table.on("beforetablesort", function (event, data) {
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
898 // data.column - the index of the column sorted after a click
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
899 // data.direction - the sorting direction (either asc or desc)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
900 $("#msg").text("Sorting index " + data.column)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
901 });
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
902 table.on("aftertablesort", function (event, data) {
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
903 var th = $(this).find("th");
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
904 th.find(".arrow").remove();
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
905 var dir = $.fn.stupidtable.dir;
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
906 var arrow = data.direction === dir.ASC ? "&uarr;" : "&darr;";
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
907 th.eq(data.column).append('<span class="arrow">' + arrow +'</span>');
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
908 });
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
909 });
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
910 </script>
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
911 <style type="text/css">
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
912 label {
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
913 display: inline-block;
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
914 width: 5em;
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
915 }
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
916 table {
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
917 border-collapse: collapse;
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
918 }
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
919 th, td {
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
920 padding: 5px 10px;
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
921 border: 1px solid #999;
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
922 }
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
923 th {
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
924 background-color: #a7d3ff;
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
925 }
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
926 th[data-sort]{
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
927 cursor:pointer;
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
928 }
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
929 a[data-lightbox]{
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
930 cursor:zoom-in;
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
931 }
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
932 #msg {
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
933 color: green;
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
934 }
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
935 </style>
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
936 </head>
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
937 <body>
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
938 <h1>Readthrough analyse results</h1>
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
939 %s
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
940 </body>
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
941 </html> """ % (gene_table)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
942
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
943 html_file = open(html,"w")
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
944 html_file.write(html_str)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
945 html_file.close()
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
946
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
947
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
948 except Exception, e :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
949 stop_err('Error during html page creation : ' + str( e ) )
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
950
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
951
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
952 def __main__():
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
953
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
954 #Parse command line options
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
955 parser = optparse.OptionParser()
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
956 parser.add_option("-g", "--gff", dest="gff", type= "string",
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
957 help="GFF annotation file", metavar="FILE")
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
958
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
959 parser.add_option("-f", "--fasta", dest="fasta", type= "string",
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
960 help="Fasta file ", metavar="FILE")
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
961
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
962 parser.add_option("-b", "--bam", dest="bamfile", type= "string",
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
963 help="Bam Ribo-Seq alignments ", metavar="FILE")
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
964
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
965 parser.add_option("-d", "--dirout", dest="dirout", type= "string",
10
rlegendre
parents: 6
diff changeset
966 help="write report in this html file and in associated directory", metavar="STR,STR")
rlegendre
parents: 6
diff changeset
967
rlegendre
parents: 6
diff changeset
968 parser.add_option("-e", "--extend", dest="extend", type= "int",
rlegendre
parents: 6
diff changeset
969 help="Lenght of extension after stop in number of base pairs (depends on your organisme)", metavar="INT")
6
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
970
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
971 parser.add_option("-q", "--quiet",
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
972 action="store_false", dest="verbose", default=True,
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
973 help="don't print status messages to stdout")
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
974
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
975 (options, args) = parser.parse_args()
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
976 sys.stdout.write("Begin readthrough analysis at %s\n" % time.asctime( time.localtime(time.time())))
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
977
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
978 try:
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
979 (html_file, subfolder ) = options.dirout.split(",")
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
980 if os.path.exists(subfolder):
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
981 raise
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
982 try:
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
983 os.mkdir(subfolder)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
984 except:
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
985 raise
10
rlegendre
parents: 6
diff changeset
986 ## identify GFF or GTF format from 9th column
rlegendre
parents: 6
diff changeset
987 with open (options.gff,"r") as gffile :
rlegendre
parents: 6
diff changeset
988 for line in gffile :
rlegendre
parents: 6
diff changeset
989 if '#' in line :
rlegendre
parents: 6
diff changeset
990 ## skip header
rlegendre
parents: 6
diff changeset
991 gffile.next()
rlegendre
parents: 6
diff changeset
992 elif 'gene_id' in line :
rlegendre
parents: 6
diff changeset
993 ## launch gtf reader :
rlegendre
parents: 6
diff changeset
994 GFF = ribo_functions.store_gtf(options.gff)
rlegendre
parents: 6
diff changeset
995 break
rlegendre
parents: 6
diff changeset
996 elif 'ID=' in line :
rlegendre
parents: 6
diff changeset
997 ## launch gff reader
rlegendre
parents: 6
diff changeset
998 GFF = ribo_functions.store_gff(options.gff)
rlegendre
parents: 6
diff changeset
999 break
rlegendre
parents: 6
diff changeset
1000 else :
rlegendre
parents: 6
diff changeset
1001 stop_err( 'Please check your annotation file is in correct format, GFF or GTF' )
rlegendre
parents: 6
diff changeset
1002
rlegendre
parents: 6
diff changeset
1003 #GFF = store_gff(options.gff)
rlegendre
parents: 6
diff changeset
1004 #GFF = ribo_functions.store_gtf(options.gff)
rlegendre
parents: 6
diff changeset
1005 ## check gff reading
rlegendre
parents: 6
diff changeset
1006 if not GFF['order'] :
rlegendre
parents: 6
diff changeset
1007 stop_err( 'Incorrect GFF file' + str( e ) )
6
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
1008 clean_file = cleaning_bam(options.bamfile)
10
rlegendre
parents: 6
diff changeset
1009 compute_analysis(clean_file, GFF, options.fasta, options.gff, subfolder, options.extend)
6
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
1010 if os.path.exists( clean_file ):
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
1011 os.remove( clean_file )
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
1012
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
1013 write_html_page(html_file,subfolder)
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
1014 ##paste jquery script in result directory :
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
1015 jq_src = os.path.join(os.path.dirname(__file__),'lightbox')
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
1016 shutil.copytree(jq_src,os.path.join(subfolder,'lightbox'))
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
1017
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
1018
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
1019 sys.stdout.write("Finish readthrough analysis at %s\n" % time.asctime( time.localtime(time.time())))
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
1020 except Exception, e:
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
1021 stop_err( 'Error running metagene readthrough analysis : ' + str( e ) )
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
1022
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
1023
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
1024 if __name__=="__main__":
29c9c86e17e1 Uploaded
rlegendre
parents:
diff changeset
1025 __main__()