annotate get_codon_frequency.py @ 10:707807fee542

(none)
author rlegendre
date Thu, 22 Jan 2015 14:34:38 +0100
parents b8c070add3b7
children 7c944fd9907e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
1 #!/usr/bin/env python2.7
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
2 # -*- coding: utf-8 -*-
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
3
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
4 '''
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
5 Created on sep. 2013
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
6 @author: rachel legendre
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
7 @copyright: rachel.legendre@igmors.u-psud.fr
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
8 @license: GPL v3
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
9 '''
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
10
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
11 from __future__ import division
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
12 import os, sys, optparse, tempfile, subprocess, re, shutil, commands, urllib, time
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
13 import itertools
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
14 import math
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
15 from decimal import Decimal
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
16 from Bio import SeqIO
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
17 from Bio.Seq import Seq
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
18 from numpy import arange, std, array, linspace, average
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
19 #from matplotlib import pyplot as pl
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
20 import matplotlib
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
21 matplotlib.use('Agg')
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
22 import matplotlib.pyplot as pl
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
23 from matplotlib import font_manager
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
24 from matplotlib import colors
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
25 import csv
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
26 from scipy import stats
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
27 from collections import OrderedDict
10
rlegendre
parents: 0
diff changeset
28 import ribo_functions
rlegendre
parents: 0
diff changeset
29 import HTSeq
0
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
30 # #libraries for debugg
10
rlegendre
parents: 0
diff changeset
31 import pdb
0
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
32 # import cPickle
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
33
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
34 def stop_err(msg):
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
35 sys.stderr.write("%s\n" % msg)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
36 sys.stderr.write("Programme aborted at %s\n" % time.asctime(time.localtime(time.time())))
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
37 sys.exit()
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
38
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
39 def store_gff(gff):
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
40 '''
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
41 parse and store gff file in a dictionnary
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
42 '''
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
43 try:
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
44 GFF = OrderedDict({})
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
45 with open(gff, 'r') as f_gff :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
46 # GFF['order'] = []
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
47 for line in f_gff:
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
48 # # switch commented lines
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
49 head = line.split("#")[0]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
50 if head != "" :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
51 feature = (line.split('\t')[8]).split(';')
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
52 chrom = line.split('\t')[0]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
53 if chrom not in GFF :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
54 GFF[chrom] = {}
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
55 # first line is already gene line :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
56 if line.split('\t')[2] == 'gene' :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
57 gene = feature[0].replace("ID=", "")
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
58 if re.search('gene', feature[2]) :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
59 Name = feature[2].replace("gene=", "")
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
60 else :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
61 Name = "Unknown"
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
62 # #get annotation
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
63 note = re.sub(r".+\;Note\=(.+)\;display\=.+", r"\1", line)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
64 note = urllib.unquote(str(note)).replace("\n", "")
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
65 # # store gene information
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
66 # GFF['order'].append(gene)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
67 GFF[chrom][gene] = {}
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
68 GFF[chrom][gene]['chrom'] = line.split('\t')[0]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
69 GFF[chrom][gene]['start'] = int(line.split('\t')[3])
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
70 GFF[chrom][gene]['stop'] = int(line.split('\t')[4])
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
71 GFF[chrom][gene]['strand'] = line.split('\t')[6]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
72 GFF[chrom][gene]['name'] = Name
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
73 GFF[chrom][gene]['note'] = note
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
74 GFF[chrom][gene]['exon'] = {}
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
75 GFF[chrom][gene]['exon_number'] = 0
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
76 # print Name
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
77 elif line.split('\t')[2] == 'CDS' :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
78 gene = re.sub(r"Parent\=(.+)_mRNA", r"\1", feature[0])
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
79 if GFF[chrom].has_key(gene) :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
80 GFF[chrom][gene]['exon_number'] += 1
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
81 exon_number = GFF[chrom][gene]['exon_number']
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
82 GFF[chrom][gene]['exon'][exon_number] = {}
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
83 GFF[chrom][gene]['exon'][exon_number]['frame'] = line.split('\t')[7]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
84 GFF[chrom][gene]['exon'][exon_number]['start'] = int(line.split('\t')[3])
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
85 GFF[chrom][gene]['exon'][exon_number]['stop'] = int(line.split('\t')[4])
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
86 # # if there is a five prim UTR intron, we change start of gene
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
87 elif line.split('\t')[2] == 'five_prime_UTR_intron' :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
88 if GFF[chrom][gene]['strand'] == "+" :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
89 GFF[chrom][gene]['start'] = GFF[chrom][gene]['exon'][1]['start']
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
90 else :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
91 GFF[chrom][gene]['stop'] = GFF[chrom][gene]['exon'][exon_number]['stop']
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
92 return GFF
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
93 except Exception, e:
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
94 stop_err('Error during gff storage : ' + str(e))
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
95
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
96 #chrI SGD gene 87286 87752 . + . ID=YAL030W;Name=YAL030W;gene=SNC1;Alias=SNC1;Ontology_term=GO:0005484,GO:0005768,GO:0005802,GO:0005886,GO:0005935,GO:0006887,GO:0006893,GO:000689
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
97 #7,GO:0006906,GO:0030658,GO:0031201;Note=Vesicle%20membrane%20receptor%20protein%20%28v-SNARE%29%3B%20involved%20in%20the%20fusion%20between%20Golgi-derived%20secretory%20vesicles%20with%20the%20plasma%20membra
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
98 #ne%3B%20proposed%20to%20be%20involved%20in%20endocytosis%3B%20member%20of%20the%20synaptobrevin%2FVAMP%20family%20of%20R-type%20v-SNARE%20proteins%3B%20SNC1%20has%20a%20paralog%2C%20SNC2%2C%20that%20arose%20fr
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
99 #om%20the%20whole%20genome%20duplication;display=Vesicle%20membrane%20receptor%20protein%20%28v-SNARE%29;dbxref=SGD:S000000028;orf_classification=Verified
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
100 #chrI SGD CDS 87286 87387 . + 0 Parent=YAL030W_mRNA;Name=YAL030W_CDS;orf_classification=Verified
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
101 #chrI SGD CDS 87501 87752 . + 0 Parent=YAL030W_mRNA;Name=YAL030W_CDS;orf_classification=Verified
10
rlegendre
parents: 0
diff changeset
102
0
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
103
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
104 def init_codon_dict():
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
105
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
106 Codon_dict = OrderedDict([('AAA', 0), ('AAC', 0), ('AAG', 0), ('AAT', 0), ('ACA', 0), ('ACC', 0), ('ACG', 0), ('ACT', 0), ('AGA', 0), ('AGC', 0), ('AGG', 0), ('AGT', 0), ('ATA', 0), ('ATC', 0), ('ATG', 0), ('ATT', 0), ('CAA', 0), ('CAC', 0), ('CAG', 0), ('CAT', 0), ('CCA', 0), ('CCC', 0), ('CCG', 0), ('CCT', 0), ('CGA', 0), ('CGC', 0), ('CGG', 0), ('CGT', 0), ('CTA', 0), ('CTC', 0), ('CTG', 0), ('CTT', 0), ('GAA', 0), ('GAC', 0), ('GAG', 0), ('GAT', 0), ('GCA', 0), ('GCC', 0), ('GCG', 0), ('GCT', 0), ('GGA', 0), ('GGC', 0), ('GGG', 0), ('GGT', 0), ('GTA', 0), ('GTC', 0), ('GTG', 0), ('GTT', 0), ('TAA', 0), ('TAC', 0), ('TAG', 0), ('TAT', 0), ('TCA', 0), ('TCC', 0), ('TCG', 0), ('TCT', 0), ('TGA', 0), ('TGC', 0), ('TGG', 0), ('TGT', 0), ('TTA', 0), ('TTC', 0), ('TTG', 0), ('TTT', 0)])
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
107 return Codon_dict
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
108
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
109
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
110
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
111 def get_codon_usage(bamfile, GFF, site, kmer, a_pos):
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
112 '''
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
113 Read GFF dict and get gene codon usage.
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
114 Return dict of codons usage
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
115 '''
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
116 try:
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
117 codon = init_codon_dict()
10
rlegendre
parents: 0
diff changeset
118 for feature in GFF :
rlegendre
parents: 0
diff changeset
119 if feature.type == 'gene' :
0
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
120 codon_dict = init_codon_dict()
10
rlegendre
parents: 0
diff changeset
121 chrom = feature.iv.chrom
rlegendre
parents: 0
diff changeset
122 start = feature.iv.start
rlegendre
parents: 0
diff changeset
123 stop = feature.iv.end
0
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
124 region = chrom + ':' + str(start) + '-' + str(stop)
10
rlegendre
parents: 0
diff changeset
125
rlegendre
parents: 0
diff changeset
126 ## DEPRECATED
rlegendre
parents: 0
diff changeset
127 #for chrom in GFF.iterkeys():
rlegendre
parents: 0
diff changeset
128 #for gene in GFF[chrom] :
rlegendre
parents: 0
diff changeset
129 # codon_dict = init_codon_dict()
rlegendre
parents: 0
diff changeset
130 #start = GFF[chrom][gene]['start']
rlegendre
parents: 0
diff changeset
131 #print start
rlegendre
parents: 0
diff changeset
132 #stop = GFF[chrom][gene]['stop']
rlegendre
parents: 0
diff changeset
133 #print stop
rlegendre
parents: 0
diff changeset
134 #region = chrom + ':' + str(start) + '-' + str(stop)
rlegendre
parents: 0
diff changeset
135 #######
0
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
136 # #get all reads in this gene
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
137 reads = subprocess.check_output(["samtools", "view", bamfile, region])
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
138 head = subprocess.check_output(["samtools", "view", "-H", bamfile])
10
rlegendre
parents: 0
diff changeset
139 read_tab = reads.split('\n')
0
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
140 for read in read_tab:
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
141 # # search mapper for eliminate multiple alignements
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
142 if 'bowtie' in head:
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
143 multi_tag = "XS:i:"
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
144 elif 'bwa' in head:
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
145 multi_tag = "XT:A:R"
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
146 else :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
147 stop_err("No PG tag find in"+samfile+". Please use bowtie or bwa for mapping")
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
148 if len(read) == 0:
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
149 continue
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
150 len_read = len(read.split('\t')[9])
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
151 # if it's read of good length
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
152 if len_read == kmer and multi_tag not in read:
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
153 feat = read.split('\t')
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
154 seq = feat[9]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
155 # if it's a reverse read
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
156 if feat[1] == '16' :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
157 if site == "A" :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
158 # #get A-site
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
159 cod = str(Seq(seq[a_pos-5:a_pos-2]).reverse_complement())
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
160 elif site == "P" :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
161 # #get P-site
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
162 cod = str(Seq(seq[a_pos-2:a_pos+1]).reverse_complement())
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
163 else :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
164 # #get site-E
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
165 cod = str(Seq(seq[a_pos+1:a_pos+4]).reverse_complement())
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
166 # # test if it's a true codon not a CNG codon for example
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
167 if codon_dict.has_key(cod) :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
168 codon_dict[cod] += 1
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
169 # if it's a forward read
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
170 elif feat[1] == '0' :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
171 if site == "A" :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
172 # #get A-site
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
173 cod = seq[a_pos:a_pos+3]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
174 elif site == "P" :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
175 # #get P-site
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
176 cod = seq[a_pos-3:a_pos]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
177 else :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
178 # #get site-E
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
179 cod = seq[a_pos-6:a_pos-3]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
180 if codon_dict.has_key(cod) :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
181 codon_dict[cod] += 1
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
182 # # add in global dict
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
183 for cod, count in codon_dict.iteritems() :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
184 codon[cod] += count
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
185
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
186 return codon
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
187
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
188 except Exception, e:
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
189 stop_err('Error during codon usage calcul: ' + str(e))
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
190
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
191
10
rlegendre
parents: 0
diff changeset
192
rlegendre
parents: 0
diff changeset
193
0
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
194 '''
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
195 http://pyinsci.blogspot.fr/2009/09/violin-plot-with-matplotlib.html
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
196 '''
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
197 def violin_plot(ax, data, pos, bp=False):
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
198 '''
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
199 create violin plots on an axis
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
200 '''
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
201 dist = max(pos) - min(pos)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
202 w = min(0.15 * max(dist, 1.0), 0.5)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
203 for d, p in zip(data, pos):
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
204 k = stats.gaussian_kde(d) # calculates the kernel density
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
205 m = k.dataset.min() # lower bound of violin
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
206 M = k.dataset.max() # upper bound of violin
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
207 x = arange(m, M, (M - m) / 100.) # support for violin
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
208 v = k.evaluate(x) # violin profile (density curve)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
209 v = v / v.max() * w # scaling the violin to the available space
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
210 ax.fill_betweenx(x, p, v + p, facecolor=color1, alpha=0.3)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
211 ax.fill_betweenx(x, p, -v + p, facecolor=color2, alpha=0.3)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
212 if bp:
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
213 ax.boxplot(data, notch=1, positions=pos, vert=1)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
214
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
215
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
216
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
217 '''
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
218 http://log.ooz.ie/2013/02/matplotlib-comparative-histogram-recipe.html
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
219 '''
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
220 def comphist(x1, x2, orientation='vertical', **kwargs):
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
221 """Draw a comparative histogram."""
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
222 # Split keyword args:
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
223 kwargs1 = {}
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
224 kwargs2 = {}
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
225 kwcommon = {}
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
226 for arg in kwargs:
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
227 tgt_arg = arg[:-1]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
228 if arg.endswith('1'):
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
229 arg_dict = kwargs1
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
230 elif arg.endswith('2'):
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
231 arg_dict = kwargs2
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
232 else:
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
233 arg_dict = kwcommon
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
234 tgt_arg = arg
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
235 arg_dict[tgt_arg] = kwargs[arg]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
236 kwargs1.update(kwcommon)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
237 kwargs2.update(kwcommon)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
238
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
239 fig = pl.figure()
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
240
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
241 # Have both histograms share one axis.
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
242 if orientation == 'vertical':
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
243 ax1 = pl.subplot(211)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
244 ax2 = pl.subplot(212, sharex=ax1)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
245 # Flip the ax2 histogram horizontally.
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
246 ax2.set_ylim(ax1.get_ylim()[::-1])
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
247 pl.setp(ax1.get_xticklabels(), visible=False)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
248 legend_loc = (1, 4)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
249 else:
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
250 ax1 = pl.subplot(122)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
251 ax2 = pl.subplot(121, sharey=ax1)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
252 # Flip the ax2 histogram vertically.
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
253 ax2.set_xlim(ax2.get_xlim()[::-1])
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
254 pl.setp(ax1.get_yticklabels(), visible=False)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
255 legend_loc = (1, 2)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
256
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
257 ax1.hist(x1, orientation=orientation, **kwargs1)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
258 ax2.hist(x2, orientation=orientation, **kwargs2)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
259 ax2.set_ylim(ax1.get_ylim()[::-1])
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
260 ax1.legend(loc=legend_loc[0])
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
261 ax2.legend(loc=legend_loc[1])
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
262 # Tighten up the layout.
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
263 pl.subplots_adjust(wspace=0.0, hspace=0.0)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
264 return fig
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
265
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
266
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
267 def compute_FC_plot(cond1_norm, cond2_norm, cod_name, codon_to_test, dirout):
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
268
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
269 FC_tab = []
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
270 for z, y in zip(cond1_norm.itervalues(), cond2_norm.itervalues()):
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
271 fc = z - y
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
272 FC_tab.append(fc)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
273 # #codon_to_test = ['TGA','TAG','TAA']
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
274
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
275 a = []
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
276 b = []
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
277 cod = []
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
278 for codon in cond1_norm.iterkeys():
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
279 if codon in codon_to_test :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
280 fc = cond1_norm[codon] - cond2_norm[codon]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
281 b.append(fc)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
282 cod.append(codon)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
283 else :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
284 fc = cond1_norm[codon] - cond2_norm[codon]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
285 a.append(fc)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
286
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
287
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
288 fig = pl.figure(num=1)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
289 comphist(array(a), array(b), label1='All codon', label2=cod_name, color2='green', bins=30, rwidth=1)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
290 # pl.show()
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
291 pl.savefig(dirout + '/hist_codon_fc.png', format="png", dpi=340)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
292 pl.clf()
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
293
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
294
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
295 # #violin plot
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
296 pos = range(2)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
297 dat = array([array(a), array(b)])
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
298 fig = pl.figure()
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
299 pl.title("Distribution of codons FoldChange between two conditions")
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
300 ax = fig.add_subplot(1, 1, 1)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
301 lab = array(['All codons', cod_name])
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
302 violin_plot(ax, dat, pos, bp=1)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
303 for x, z in zip(dat, pos):
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
304 ax.plot(z, average(x), color='r', marker='*', markeredgecolor='r')
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
305 xtickNames = pl.setp(ax, xticklabels=lab)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
306 pl.savefig(dirout + '/violinplot_codon.png', format="png", dpi=340)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
307 pl.clf()
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
308
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
309 # (Fval,pval) = stats.ttest_ind(a, b, axis=0, equal_var=True)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
310 (Fval, pval) = stats.mannwhitneyu(a, b)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
311 return pval
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
312
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
313
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
314 def get_aa_dict(cond1_norm, cond2_norm):
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
315
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
316 # ## create amino acid dictionnary:
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
317 AA = OrderedDict({})
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
318 AA['Phe'] = [cond1_norm['TTT'] + cond1_norm['TTC'], cond2_norm['TTT'] + cond2_norm['TTC']]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
319 AA['Leu'] = [cond1_norm['TTA'] + cond1_norm['TTG'] + cond1_norm['CTT'] + cond1_norm['CTC'] + cond1_norm['CTA'] + cond1_norm['CTG'], cond2_norm['TTA'] + cond2_norm['TTG'] + cond2_norm['CTT'] + cond2_norm['CTC'] + cond2_norm['CTA'] + cond2_norm['CTG']]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
320 AA['Ile'] = [cond1_norm['ATT'] + cond1_norm['ATC'] + cond1_norm['ATA'], cond2_norm['ATT'] + cond2_norm['ATC'] + cond2_norm['ATA']]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
321 AA['Met'] = [cond1_norm['ATG'], cond2_norm['ATG']]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
322 AA['Val'] = [cond1_norm['GTT'] + cond1_norm['GTC'] + cond1_norm['GTA'] + cond1_norm['GTG'] + cond1_norm['AGT'] + cond1_norm['AGC'], cond2_norm['GTT'] + cond2_norm['GTC'] + cond2_norm['GTA'] + cond2_norm['GTG'] + cond2_norm['AGT'] + cond2_norm['AGC']]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
323 AA['Ser'] = [cond1_norm['TCT'] + cond1_norm['TCC'] + cond1_norm['TCA'] + cond1_norm['TCG'], cond2_norm['TCT'] + cond2_norm['TCC'] + cond2_norm['TCA'] + cond2_norm['TCG']]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
324 AA['Pro'] = [cond1_norm['CCT'] + cond1_norm['CCC'] + cond1_norm['CCA'] + cond1_norm['CCG'], cond2_norm['CCT'] + cond2_norm['CCC'] + cond2_norm['CCA'] + cond2_norm['CCG']]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
325 AA['Thr'] = [cond1_norm['ACT'] + cond1_norm['ACC'] + cond1_norm['ACA'] + cond1_norm['ACG'], cond2_norm['ACT'] + cond2_norm['ACC'] + cond2_norm['ACA'] + cond2_norm['ACG']]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
326 AA['Ala'] = [cond1_norm['GCT'] + cond1_norm['GCC'] + cond1_norm['GCA'] + cond1_norm['GCG'], cond2_norm['GCT'] + cond2_norm['GCC'] + cond2_norm['GCA'] + cond2_norm['GCG']]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
327 AA['Tyr'] = [cond1_norm['TAT'] + cond1_norm['TAC'], cond2_norm['TAT'] + cond2_norm['TAC']]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
328 AA['Stop'] = [cond1_norm['TAA'] + cond1_norm['TAG'] + cond1_norm['TGA'], cond2_norm['TAA'] + cond2_norm['TAG'] + cond2_norm['TGA']]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
329 AA['His'] = [cond1_norm['CAT'] + cond1_norm['CAC'], cond2_norm['CAT'] + cond2_norm['CAC']]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
330 AA['Gln'] = [cond1_norm['CAA'] + cond1_norm['CAG'], cond2_norm['CAA'] + cond2_norm['CAG']]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
331 AA['Asn'] = [cond1_norm['AAT'] + cond1_norm['AAC'], cond2_norm['AAT'] + cond2_norm['AAC']]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
332 AA['Lys'] = [cond1_norm['AAA'] + cond1_norm['AAG'], cond2_norm['AAA'] + cond2_norm['AAG']]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
333 AA['Asp'] = [cond1_norm['GAT'] + cond1_norm['GAC'], cond2_norm['GAT'] + cond2_norm['GAC']]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
334 AA['Glu'] = [cond1_norm['GAA'] + cond1_norm['GAG'], cond2_norm['GAA'] + cond2_norm['GAG']]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
335 AA['Cys'] = [cond1_norm['TGT'] + cond1_norm['TGC'], cond2_norm['TGT'] + cond2_norm['TGC']]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
336 AA['Trp'] = [cond1_norm['TGG'], cond2_norm['TGG']]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
337 AA['Arg'] = [cond1_norm['CGT'] + cond1_norm['CGC'] + cond1_norm['CGA'] + cond1_norm['CGG'] + cond1_norm['AGA'] + cond1_norm['AGG'], cond2_norm['CGT'] + cond2_norm['CGC'] + cond2_norm['CGA'] + cond2_norm['CGG'] + cond2_norm['AGA'] + cond2_norm['AGG']]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
338 AA['Gly'] = [cond1_norm['GGT'] + cond1_norm['GGC'] + cond1_norm['GGA'] + cond1_norm['GGG'], cond2_norm['GGT'] + cond2_norm['GGC'] + cond2_norm['GGA'] + cond2_norm['GGG']]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
339
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
340
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
341 return AA
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
342
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
343
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
344
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
345 def plot_codon_usage(result, dirout, c1, c2, outfile, color1, color2):
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
346 '''
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
347 Take list of dict of codon usage and use matplotlib for do graph
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
348 '''
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
349
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
350 # #if there are replicat
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
351 if len(result) == 4 :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
352 # store each dict in variables to make code more readable
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
353 cond1_1 = result[0].copy()
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
354 cond1_2 = result[1].copy()
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
355 cond2_1 = result[2].copy()
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
356 cond2_2 = result[3].copy()
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
357 # get codon order in one of list
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
358 codon_sorted = sorted(cond1_1.iterkeys(), reverse=False)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
359 # get max of each list
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
360 sum11 = sum(list(cond1_1.itervalues()))
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
361 sum12 = sum(list(cond1_2.itervalues()))
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
362 sum21 = sum(list(cond2_1.itervalues()))
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
363 sum22 = sum(list(cond2_2.itervalues()))
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
364 # for each codon, get values and sd in each condition
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
365 cond1_val = {}
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
366 cond1 = {}
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
367 cond2_val = {}
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
368 cond2 = {}
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
369 std_cond1 = []
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
370 std_cond2 = []
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
371 max_val = [] # # max value for graph
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
372 for i in codon_sorted:
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
373 # # cond1 = moyenne of replicats cond1 divided by max
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
374 cond1_val[i] = ((cond1_1[i] / sum11 + cond1_2[i] / sum12) / 2)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
375 cond1[i] = ((cond1_1[i] + cond1_2[i]) / 2)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
376 # # standard deviation = absolute value of diffence between replicats of cond1
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
377 std_cond1.append(std(array([(cond1_1[i] * 100 / sum11), (cond1_2[i] * 100 / sum12)])))
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
378 # # cond2 = moyenne of replicats cond1divided by max
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
379 cond2_val[i] = ((cond2_1[i] / sum21 + cond2_2[i] / sum22) / 2)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
380 cond2[i] = ((cond2_1[i] + cond2_2[i]) / 2)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
381 # # standard deviation = absolute value of diffence between replicats of cond2
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
382 std_cond2.append(std(array([((cond2_1[i]) * 100 / sum21), ((cond2_2[i]) * 100 / sum22)])))
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
383 # # max value for each codon
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
384 max_val.append(max((cond1_1[i] / sum11 + cond1_2[i] / sum12) / 2, (cond2_1[i] / sum21 + cond2_2[i] / sum22) / 2))
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
385
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
386 # for graph design
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
387 cond1_norm = OrderedDict(sorted(cond1_val.items(), key=lambda t: t[0]))
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
388 cond1_norm.update ((x, y * 100) for x, y in cond1_norm.items())
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
389 cond2_norm = OrderedDict(sorted(cond2_val.items(), key=lambda t: t[0]))
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
390 cond2_norm.update ((x, y * 100) for x, y in cond2_norm.items())
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
391 max_val = [x * 100 for x in max_val]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
392
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
393 AA = get_aa_dict(cond1_norm, cond2_norm)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
394 max_valaa = []
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
395 cond1_aa = []
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
396 cond2_aa = []
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
397 aa_name = list(AA.iterkeys())
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
398 for z in AA.itervalues():
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
399 cond1_aa.append(z[0])
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
400 cond2_aa.append(z[1])
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
401 max_valaa.append(max(z))
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
402 # # plot amino acid profile :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
403 fig = pl.figure(figsize=(24, 10), num=1)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
404 width = .50
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
405 ax = fig.add_subplot(111)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
406 ax.xaxis.set_ticks([])
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
407 ind = arange(21)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
408 pl.xlim(0, 21)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
409 ax.bar(ind, cond1_aa, width, facecolor=color1, label=c1)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
410 ax.bar(ind + width, cond2_aa, width, facecolor=color2, label=c2)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
411 for x, y, z in zip(ind, max_valaa, aa_name):
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
412 ax.text(x + width, y + 0.2, '%s' % z, ha='center', va='bottom', fontsize=14)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
413 ax.set_ylabel('Ribosome Occupancy (percent of normalized reads)')
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
414 ax.set_xlabel('Amino Acid')
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
415 handles, labels = ax.get_legend_handles_labels()
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
416 ax.legend(handles, labels)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
417 pl.savefig(dirout + '/hist_amino_acid.png', format="png", dpi=340)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
418 pl.clf()
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
419
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
420
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
421 # # compute theorical count in COND2
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
422 sum2 = (sum21 + sum22) / 2
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
423 cond2_count = []
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
424 for z in cond1_norm.itervalues() :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
425 count = int(z * sum2 / 100)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
426 cond2_count.append(count)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
427
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
428 expected = array(cond2_count)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
429 observed = array(list(cond2.itervalues()))
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
430
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
431 # write result
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
432 with open(outfile, 'w') as out :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
433 out.write('Codon\tRaw_' + c1 + '\tRaw_' + c2 + '\tNorm_' + c1 + '\tNorm_' + c2 + '\tFC\tFC_' + c1 + '\tFC_' + c2 + '\n')
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
434 for i in codon_sorted:
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
435 out.write(i + '\t' + str(cond1[i]) + '\t' + str(cond2[i]) + '\t' + str(cond1_norm[i]) + '\t' + str(cond2_norm[i]) + '\t' + str(cond2_norm[i] / cond1_norm[i]) + '\t' + str((cond2_1[i] / sum21) / (cond1_1[i] / sum11)) + '\t' + str((cond2_2[i] / sum22) / (cond1_1[i] / sum11)) + '\n')
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
436 chi = stats.chisquare(observed, expected)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
437 out.write('Khi2 test\n')
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
438 out.write('T : ' + str(chi[0]) + '; p-value : ' + str(chi[1]) + '\n')
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
439
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
440
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
441
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
442 # plot result
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
443 fig = pl.figure(figsize=(24, 10), num=1)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
444 width = .50
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
445 ind = arange(len(codon_sorted))
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
446 ax = fig.add_subplot(111)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
447 pl.xlim(0, len(codon_sorted) + 1)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
448 ax.spines['right'].set_color('none')
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
449 ax.spines['top'].set_color('none')
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
450 ax.xaxis.set_ticks([])
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
451 ax.spines['left'].set_smart_bounds(True)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
452 ax.yaxis.set_ticks_position('left')
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
453 ax.bar(ind, list(cond1_norm.itervalues()), width, facecolor=color1, yerr=std_cond1, error_kw={'elinewidth':1, 'ecolor':'black'}, label=c1)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
454 ax.bar(ind + width, list(cond2_norm.itervalues()), width, yerr=std_cond2, facecolor=color2, error_kw={'elinewidth':1, 'ecolor':'black'}, label=c2)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
455 for x, y, z in zip(ind, max_val, codon_sorted):
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
456 ax.text(x + width, y + 0.2, '%s' % z, ha='center', va='bottom', fontsize=8)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
457 ax.set_ylabel('Ribosome Occupancy (percent of normalized reads)')
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
458 ax.set_xlabel('Codons')
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
459 handles, labels = ax.get_legend_handles_labels()
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
460 ax.legend(handles, labels)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
461 pl.savefig(dirout + '/hist_codons.png', format="png", dpi=340)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
462 pl.clf()
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
463
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
464
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
465
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
466 elif len(result) == 2 :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
467
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
468 # store each dict in OrderedDict sorted by key to make code more readable
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
469 cond1 = result[0]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
470 cond2 = result[1]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
471 cond1_norm = result[0].copy()
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
472 cond2_norm = result[1].copy()
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
473 # pdb.set_trace()
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
474 # get codon order in one of list
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
475 codon_sorted = sorted(cond1.iterkeys(), reverse=False)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
476
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
477 # get sum of each list
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
478 sum1 = sum(list(cond1.itervalues()))
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
479 sum2 = sum(list(cond2.itervalues()))
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
480 # #Normalize values by sum of each libraries
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
481 cond1_norm.update ((x, (y / sum1) * 100.0) for x, y in cond1_norm.items())
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
482 cond2_norm.update((x, (y / sum2) * 100.0) for x, y in cond2_norm.items())
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
483
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
484 # # compute theorical count in COND2
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
485 cond2_count = []
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
486 for z in cond1_norm.itervalues() :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
487 count = int(z * sum2 / 100.0)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
488 cond2_count.append(count)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
489
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
490 expected = array(cond2_count)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
491 observed = array(list(cond2.itervalues()))
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
492
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
493 AA = get_aa_dict(cond1_norm, cond2_norm)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
494
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
495 max_val = []
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
496 cond1_aa = []
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
497 cond2_aa = []
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
498 aa_name = list(AA.iterkeys())
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
499 for z in AA.itervalues():
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
500 cond1_aa.append(z[0])
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
501 cond2_aa.append(z[1])
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
502 max_val.append(max(z))
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
503
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
504 # # plot amino acid profile :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
505 fig = pl.figure(num=1)
10
rlegendre
parents: 0
diff changeset
506 width = .45
0
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
507 ax = fig.add_subplot(111)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
508 ind = arange(21)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
509 pl.xlim(0, 21)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
510 #kwargs = {"hatch":'x'}
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
511 #ax.bar(ind, cond1_aa, width, facecolor=color1, label=c1, **kwargs)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
512 #kwargs = {"hatch":'.'}
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
513 #ax.bar(ind + width, cond2_aa, width, facecolor=color2, label=c2, **kwargs)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
514 ax.bar(ind, cond1_aa, width, facecolor=color1, label=c1)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
515 ax.bar(ind + width, cond2_aa, width, facecolor=color2, label=c2)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
516 #for x, y, z in zip(ind, max_val, aa_name):
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
517 # ax.text(x + width, y + 0.2, '%s' % z, ha='center', va='bottom', fontsize=14)
10
rlegendre
parents: 0
diff changeset
518 axis_font = {'size':'10'}
0
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
519 pl.xticks(ind + width, aa_name,**axis_font)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
520 ax.spines['right'].set_visible(False)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
521 ax.spines['top'].set_visible(False)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
522 ax.yaxis.set_ticks_position('left')
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
523 ax.xaxis.set_ticks_position('bottom')
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
524 #ax.xaxis.set_ticks([])
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
525 ax.set_ylabel('Ribosome Occupancy (percent of normalized reads)',**axis_font)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
526 ax.set_xlabel('Amino Acids', **axis_font)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
527 handles, labels = ax.get_legend_handles_labels()
10
rlegendre
parents: 0
diff changeset
528 font_prop = font_manager.FontProperties(size=8)
0
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
529 ax.legend(handles, labels, prop=font_prop)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
530 pl.savefig(dirout + '/hist_amino_acid.png', format="png", dpi=340)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
531 pl.clf()
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
532
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
533 # write result
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
534 with open(outfile, 'w') as out :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
535 out.write('Codon\tRaw_' + c1 + '\tRaw_' + c2 + '\tNorm_' + c1 + '\tNorm_' + c2 + '\tFC(Mut/WT)\n')
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
536 for i in codon_sorted:
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
537 out.write(i + '\t' + str(cond1[i]) + '\t' + str(cond2[i]) + '\t' + str(cond1_norm[i]) + '\t' + str(cond2_norm[i]) + '\t' + str(cond2_norm[i] / cond1_norm[i]) + '\n')
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
538 out.write('Khi2 test\n')
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
539 chi = stats.chisquare(observed, expected)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
540 out.write('T : ' + str(chi[0]) + '; p-value : ' + str(chi[1]) + '\n')
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
541
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
542 # # get max value for each codon for histogram
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
543 max_val = [] # # max value for graph
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
544 for i in cond1:
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
545 # # max value for each codon
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
546 max_val.append(max(cond1_norm[i], cond2_norm[i]))
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
547
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
548 # plot result
10
rlegendre
parents: 0
diff changeset
549 fig = pl.figure(figsize=(30, 10), num=1)
rlegendre
parents: 0
diff changeset
550 #fig = pl.figure(num=1)
rlegendre
parents: 0
diff changeset
551 width = .40
0
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
552 ind = arange(len(codon_sorted))
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
553 ax = fig.add_subplot(111)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
554 pl.xlim(0, len(codon_sorted) + 1)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
555 ax.spines['right'].set_color('none')
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
556 ax.spines['top'].set_color('none')
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
557 ax.xaxis.set_ticks([])
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
558 ax.spines['left'].set_smart_bounds(True)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
559 ax.yaxis.set_ticks_position('left')
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
560 ax.bar(ind, list(cond1_norm.itervalues()), width, facecolor=color1, label=c1)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
561 ax.bar(ind + width, list(cond2_norm.itervalues()), width, facecolor=color2, label=c2)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
562 for x, y, z in zip(ind, max_val, codon_sorted):
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
563 ax.text(x + width, y + 0.02, '%s' % z, ha='center', va='bottom', fontsize=8)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
564 ax.set_ylabel('Ribosome Occupancy (percent of normalized reads)')
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
565 ax.set_xlabel('Codons')
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
566 handles, labels = ax.get_legend_handles_labels()
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
567 ax.legend(handles, labels)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
568 pl.savefig(dirout + '/hist_codons.png', format="png", dpi=340)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
569 pl.clf()
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
570
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
571
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
572 else :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
573 stop_err('Error running codon usage plotting : ' + str(e))
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
574
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
575
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
576 return (cond1_norm, cond2_norm, chi[1])
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
577
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
578 def write_html_file(html, chi_pval, cond1, cond2):
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
579 try :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
580
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
581
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
582 html_str = """
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
583 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
584 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
585
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
586 <html xmlns="http://www.w3.org/1999/xhtml">
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
587 <head>
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
588 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
589 <link href="/static/june_2007_style/blue/base.css" media="screen" rel="Stylesheet" type="text/css" />
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
590 </head>
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
591 <body>
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
592 <h3>Global visualization</h3>
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
593 <p>
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
594 <h5>Visualization of density footprint in each codon.</h5><br> If user has selected analyse with replicats, standart error deviation between each replicate as plotting as error bar in histogram.<br>
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
595 <img border="0" src="hist_codons.png" width="1040"/>
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
596 </p>
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
597 <p>
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
598 <h5>Test for homogeneity distribution between each condition</h5><br>
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
599 H0 : %s and %s are same distribution <br>
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
600 Khi2 test p-value: %s<br><br>
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
601 If p-value less than 0.05, we can reject homogeneity distribution so we can hypothesize that distributions are not the same. Otherwise, we accept H0<br>
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
602
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
603 </p>
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
604 <p>
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
605 <h5>Visualization of density footprint in each codon groupe by amino acid</h5><br>
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
606 <img border="0" src="hist_amino_acid.png" width="1040"/>
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
607 </p>
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
608 </body>
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
609 </html> """ % (cond1,cond2,chi_pval)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
610
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
611
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
612 html_file = open(html, "w")
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
613 html_file.write(html_str)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
614 html_file.close()
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
615
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
616 except Exception, e :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
617 stop_err('Error during html page creation : ' + str(e))
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
618
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
619
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
620
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
621
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
622 def check_codons_list (codons) :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
623
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
624 for codon in codons :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
625 if codon not in init_codon_dict().iterkeys() :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
626 stop_err('Please to enter a valid codon : ' + codon + ' is not find\n')
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
627
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
628
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
629 def check_index_bam (bamfile) :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
630 # #testing indexed bam file
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
631 if os.path.isfile(bamfile + ".bai") :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
632 pass
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
633 else :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
634 cmd = "samtools index %s " % (bamfile)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
635 proc = subprocess.Popen(args=cmd, shell=True, stderr=subprocess.PIPE)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
636 returncode = proc.wait()
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
637 # if returncode != 0:
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
638 # raise Exception
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
639
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
640 def __main__():
10
rlegendre
parents: 0
diff changeset
641
0
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
642
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
643 # Parse command line options
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
644 parser = optparse.OptionParser()
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
645 parser.add_option("-g", "--gff", dest="gff", type="string",
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
646 help="gff file", metavar="FILE")
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
647
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
648 parser.add_option("-1", "--file1", dest="file1", type="string",
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
649 help="Bam Ribo-Seq alignments cond 1, if rep option, separate files by commas ", metavar="FILE")
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
650
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
651 parser.add_option("-2", "--file2", dest="file2", type="string",
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
652 help="Bam Ribo-Seq alignments cond 2, if rep option, separate files by commas", metavar="FILE")
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
653
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
654 parser.add_option("-c", "--cond1", dest="c1", type="string",
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
655 help="Name for first condition", metavar="STR")
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
656
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
657 parser.add_option("-C", "--cond2", dest="c2", type="string",
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
658 help="Name of second condition", metavar="STR")
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
659
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
660 parser.add_option("-k", "--kmer", dest="kmer", type="int",
10
rlegendre
parents: 0
diff changeset
661 help="Length of your phasing reads", metavar="INT")
0
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
662
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
663 # parser.add_option("-l", "--list", dest="list_cod", type= "string",
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
664 # help="list of codons to compare to other", metavar="STR")
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
665
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
666 parser.add_option("-o", "--out", dest="outfile", type="string",
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
667 help="write report to FILE", metavar="FILE")
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
668
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
669 parser.add_option("-d", "--dirout", dest="dirout", type="string",
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
670 help="write report to PNG files", metavar="FILE")
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
671
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
672 parser.add_option("-a", "--asite", dest="asite", type="int",
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
673 help="Off-set from the 5'end of the footprint to the A-site", metavar="INT")
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
674
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
675 parser.add_option("-s", "--site", dest="site", type="string",
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
676 help="Script can compute in site A, P or E", metavar="A|P|E")
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
677
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
678 parser.add_option("-r", "--rep", dest="rep", type="string",
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
679 help="if replicate or not", metavar="yes|no")
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
680
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
681 parser.add_option("-x", "--hex_col1", dest="color1", type= "string",
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
682 help="Color for first condition", metavar="STR")
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
683
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
684 parser.add_option("-X", "--hex_col2", dest="color2", type= "string",
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
685 help="Color for second condition", metavar="STR")
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
686
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
687 parser.add_option("-q", "--quiet",
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
688 action="store_false", dest="verbose", default=True,
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
689 help="don't print status messages to stdout")
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
690
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
691 (options, args) = parser.parse_args()
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
692 print "Begin codon frequency analysis at", time.asctime(time.localtime(time.time()))
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
693
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
694 try:
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
695 authorized_site = ["A", "P", "E"]
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
696 if options.site not in authorized_site :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
697 stop_err(options.site + ' is not a authorized ribosome site')
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
698
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
699 ## Check if colors exist
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
700 if not colors.is_color_like(options.color1) :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
701 stop_err( options.color1+' is not a proper color' )
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
702 if not colors.is_color_like(options.color2) :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
703 stop_err( options.color2+' is not a proper color' )
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
704
10
rlegendre
parents: 0
diff changeset
705 ## identify GFF or GTF format from 9th column
rlegendre
parents: 0
diff changeset
706 #with open (options.gff,"r") as gffile :
rlegendre
parents: 0
diff changeset
707 # for line in gffile :
rlegendre
parents: 0
diff changeset
708 # if '#' in line :
rlegendre
parents: 0
diff changeset
709 # ## skip header
rlegendre
parents: 0
diff changeset
710 # gffile.next()
rlegendre
parents: 0
diff changeset
711 # elif 'gene_id' in line :
rlegendre
parents: 0
diff changeset
712 # ## launch gtf reader :
rlegendre
parents: 0
diff changeset
713 # GFF = ribo_functions.store_gtf(options.gff)
rlegendre
parents: 0
diff changeset
714 # break
rlegendre
parents: 0
diff changeset
715 # elif 'ID=' in line :
rlegendre
parents: 0
diff changeset
716 # ## launch gff reader
rlegendre
parents: 0
diff changeset
717 # GFF = ribo_functions.store_gff(options.gff)
rlegendre
parents: 0
diff changeset
718 # break
rlegendre
parents: 0
diff changeset
719 # else :
rlegendre
parents: 0
diff changeset
720 # stop_err( 'Please check your annotation file is in correct format, GFF or GTF' )
rlegendre
parents: 0
diff changeset
721
rlegendre
parents: 0
diff changeset
722 #GFF = store_gff(options.gff)
rlegendre
parents: 0
diff changeset
723 #GFF = ribo_functions.store_gtf(options.gff)
rlegendre
parents: 0
diff changeset
724 ## check gff reading
rlegendre
parents: 0
diff changeset
725 #if not GFF['order'] :
rlegendre
parents: 0
diff changeset
726 # stop_err( 'Incorrect GFF file' + str( e ) )
0
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
727
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
728 #### NOT USE IN FINAL VERSION
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
729 # # get codon list
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
730 # codons = options.list_cod.upper().split(',')
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
731 # check_codons_list(codons)
10
rlegendre
parents: 0
diff changeset
732 GFF = HTSeq.GFF_Reader(options.gff)
0
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
733 # # get html file and directory :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
734 (html, html_dir) = options.dirout.split(',')
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
735 if os.path.exists(html_dir):
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
736 raise
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
737 try:
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
738 os.mkdir(html_dir)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
739 except:
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
740 raise Exception(html_dir + ' mkdir')
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
741 # #RUN analysis
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
742 # #If there are replicats
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
743 if options.rep == "yes" :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
744 result = []
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
745 # split name of each file options by ","
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
746 cond1 = options.file1.split(',')
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
747 cond2 = options.file2.split(',')
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
748 # # calcul for each file
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
749 for fh in itertools.chain(cond1, cond2):
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
750 check_index_bam (fh)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
751 result.append(get_codon_usage(fh, GFF, options.site, options.kmer, options.asite))
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
752 (cond1, cond2, chi_pval) = plot_codon_usage(result, html_dir, options.c1, options.c2, options.outfile,options.color1, options.color2)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
753 # t_pval = compute_FC_plot(cond1,cond2,codons,html_dir)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
754
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
755
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
756 # #If there are no replicat
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
757 elif options.rep == "no" :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
758 result = []
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
759 # #calcul for each cond
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
760 for fh in (options.file1, options.file2):
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
761 check_index_bam (fh)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
762 result.append(get_codon_usage(fh, GFF, options.site, options.kmer,options.asite))
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
763 (cond1, cond2, chi_pval) = plot_codon_usage(result, html_dir, options.c1, options.c2, options.outfile,options.color1, options.color2)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
764 # t_pval = compute_FC_plot(cond1,cond2,codons,html_dir)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
765
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
766 else :
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
767 sys.stderr.write("Please enter yes or no for --rep option. Programme aborted at %s" % time.asctime(time.localtime(time.time())))
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
768 sys.exit()
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
769
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
770 # write_html_file(html,chi_pval,t_pval,codons,options.c1, options.c2)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
771 write_html_file(html, chi_pval, options.c1, options.c2)
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
772
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
773 print "Finish codon frequency analysis at", time.asctime(time.localtime(time.time()))
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
774 except Exception, e:
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
775 stop_err('Error running codon frequency analysis (main program) : ' + str(e))
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
776
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
777
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
778 if __name__=="__main__":
b8c070add3b7 Uploaded
rlegendre
parents:
diff changeset
779 __main__()