Mercurial > repos > rlegendre > ribo_tools
changeset 15:702e60e819c2 draft
Uploaded
author | rlegendre |
---|---|
date | Mon, 11 May 2015 09:53:08 -0400 |
parents | 344bacf6acb8 |
children | fcfdb2607cb8 |
files | get_codon_frequency.py metagene_readthrough.py |
diffstat | 2 files changed, 89 insertions(+), 59 deletions(-) [+] |
line wrap: on
line diff
--- a/get_codon_frequency.py Fri Apr 10 03:18:56 2015 -0400 +++ b/get_codon_frequency.py Mon May 11 09:53:08 2015 -0400 @@ -11,7 +11,7 @@ from __future__ import division import os, sys, optparse, tempfile, subprocess, re, shutil, commands, urllib, time import itertools -import math +from math import log10 from decimal import Decimal from Bio import SeqIO from Bio.Seq import Seq @@ -29,7 +29,7 @@ import HTSeq # #libraries for debugg #import pdb -# import cPickle +import cPickle def stop_err(msg): sys.stderr.write("%s\n" % msg) @@ -302,39 +302,42 @@ cond2_2 = result[3].copy() # get codon order in one of list codon_sorted = sorted(cond1_1.iterkeys(), reverse=False) - # get max of each list - sum11 = sum(list(cond1_1.itervalues())) - sum12 = sum(list(cond1_2.itervalues())) - sum21 = sum(list(cond2_1.itervalues())) - sum22 = sum(list(cond2_2.itervalues())) - # for each codon, get values and sd in each condition - cond1_val = {} - cond1 = {} - cond2_val = {} - cond2 = {} - std_cond1 = [] - std_cond2 = [] - max_val = [] # # max value for graph - for i in codon_sorted: - # # cond1 = moyenne of replicats cond1 divided by max - cond1_val[i] = ((cond1_1[i] / sum11 + cond1_2[i] / sum12) / 2) - cond1[i] = ((cond1_1[i] + cond1_2[i]) / 2) - # # standard deviation = absolute value of diffence between replicats of cond1 - std_cond1.append(std(array([(cond1_1[i] * 100 / sum11), (cond1_2[i] * 100 / sum12)]))) - # # cond2 = moyenne of replicats cond1divided by max - cond2_val[i] = ((cond2_1[i] / sum21 + cond2_2[i] / sum22) / 2) - cond2[i] = ((cond2_1[i] + cond2_2[i]) / 2) - # # standard deviation = absolute value of diffence between replicats of cond2 - std_cond2.append(std(array([((cond2_1[i]) * 100 / sum21), ((cond2_2[i]) * 100 / sum22)]))) - # # max value for each codon - max_val.append(max((cond1_1[i] / sum11 + cond1_2[i] / sum12) / 2, (cond2_1[i] / sum21 + cond2_2[i] / sum22) / 2)) - - # for graph design - cond1_norm = OrderedDict(sorted(cond1_val.items(), key=lambda t: t[0])) - cond1_norm.update ((x, y * 100) for x, y in cond1_norm.items()) - cond2_norm = OrderedDict(sorted(cond2_val.items(), key=lambda t: t[0])) - cond2_norm.update ((x, y * 100) for x, y in cond2_norm.items()) - max_val = [x * 100 for x in max_val] + try: + # get max of each list + sum11 = sum(list(cond1_1.itervalues())) + sum12 = sum(list(cond1_2.itervalues())) + sum21 = sum(list(cond2_1.itervalues())) + sum22 = sum(list(cond2_2.itervalues())) + # for each codon, get values and sd in each condition + cond1_val = {} + cond1 = {} + cond2_val = {} + cond2 = {} + std_cond1 = [] + std_cond2 = [] + max_val = [] # # max value for graph + for i in codon_sorted: + # # cond1 = moyenne of replicats cond1 divided by max + cond1_val[i] = ((cond1_1[i] / sum11 + cond1_2[i] / sum12) / 2) + cond1[i] = ((cond1_1[i] + cond1_2[i]) / 2) + # # standard deviation = absolute value of diffence between replicats of cond1 + std_cond1.append(std(array([(cond1_1[i] * 100 / sum11), (cond1_2[i] * 100 / sum12)]))) + # # cond2 = moyenne of replicats cond1divided by max + cond2_val[i] = ((cond2_1[i] / sum21 + cond2_2[i] / sum22) / 2) + cond2[i] = ((cond2_1[i] + cond2_2[i]) / 2) + # # standard deviation = absolute value of diffence between replicats of cond2 + std_cond2.append(std(array([((cond2_1[i]) * 100 / sum21), ((cond2_2[i]) * 100 / sum22)]))) + # # max value for each codon + max_val.append(max((cond1_1[i] / sum11 + cond1_2[i] / sum12) / 2, (cond2_1[i] / sum21 + cond2_2[i] / sum22) / 2)) + + # for graph design + cond1_norm = OrderedDict(sorted(cond1_val.items(), key=lambda t: t[0])) + cond1_norm.update ((x, y * 100) for x, y in cond1_norm.items()) + cond2_norm = OrderedDict(sorted(cond2_val.items(), key=lambda t: t[0])) + cond2_norm.update ((x, y * 100) for x, y in cond2_norm.items()) + max_val = [x * 100 for x in max_val] + except ZeroDivisionError: + stop_err("Not enough reads to compute the codon occupancy") AA = get_aa_dict(cond1_norm, cond2_norm) max_valaa = [] @@ -346,7 +349,7 @@ cond2_aa.append(z[1]) max_valaa.append(max(z)) # # plot amino acid profile : - fig = pl.figure(figsize=(30, 10), num=1) + fig = pl.figure(figsize=(15,10), num=1) width = .50 ax = fig.add_subplot(111) ax.xaxis.set_ticks([]) @@ -386,7 +389,7 @@ # plot result - fig = pl.figure(figsize=(30, 10), num=1) + fig = pl.figure(figsize=(20,10), num=1) width = .40 ind = arange(len(codon_sorted)) ax = fig.add_subplot(111) @@ -406,7 +409,6 @@ ax.legend(handles, labels) pl.savefig(dirout + '/hist_codons.png', format="png", dpi=340) pl.clf() - elif len(result) == 2 : @@ -419,14 +421,16 @@ # pdb.set_trace() # get codon order in one of list codon_sorted = sorted(cond1.iterkeys(), reverse=False) - - # get sum of each list - sum1 = sum(list(cond1.itervalues())) - sum2 = sum(list(cond2.itervalues())) - # #Normalize values by sum of each libraries - cond1_norm.update ((x, (y / sum1) * 100.0) for x, y in cond1_norm.items()) - cond2_norm.update((x, (y / sum2) * 100.0) for x, y in cond2_norm.items()) - + try: + # get sum of each list + sum1 = sum(list(cond1.itervalues())) + sum2 = sum(list(cond2.itervalues())) + # #Normalize values by sum of each libraries + cond1_norm.update ((x, (y / sum1) * 100.0) for x, y in cond1_norm.items()) + cond2_norm.update((x, (y / sum2) * 100.0) for x, y in cond2_norm.items()) + except ZeroDivisionError: + stop_err("Not enough reads to compute the codon occupancy") + # # compute theorical count in COND2 cond2_count = [] for z in cond1_norm.itervalues() : @@ -448,7 +452,7 @@ max_val.append(max(z)) # # plot amino acid profile : - fig = pl.figure(num=1) + fig = pl.figure(figsize=(15,10), num=1) width = .45 ax = fig.add_subplot(111) ind = arange(21) @@ -492,7 +496,7 @@ max_val.append(max(cond1_norm[i], cond2_norm[i])) # plot result - fig = pl.figure(figsize=(40, 10), num=1) + fig = pl.figure(figsize=(20,10), num=1) #fig = pl.figure(num=1) width = .40 ind = arange(len(codon_sorted)) @@ -537,7 +541,7 @@ <body> <h3>Global visualization</h3> <p> - <h5>Visualization of density footprint in each codon.</h5><br> If user has selected analyse with replicats, standart error deviation between each replicate as plotting as error bar in histogram.<br> + <h5>Visualization of density footprint in each codon.</h5><br> If user has selected "Yes" for the replicate option the standard deviation between each replicate is plotted as an error bar in histogram.<br> <img border="0" src="hist_codons.png" width="1040"/> </p> <p> @@ -582,6 +586,29 @@ returncode = proc.wait() # if returncode != 0: # raise Exception + +def plot_fc (cond1, cond2, site, dirout): + + fc = cond1.copy() + + for key, value in fc.iteritems(): + fc[key] = cond2[key]/cond1[key] + + index = arange(len(fc.keys())) + label = fc.keys() + label = [w.replace('T','U') for w in label] + pl.figure(figsize=(15,10), num=1) + ax = pl.subplot(1,1,1) + pl.xticks([]) + pl.scatter(index, fc.values(), color='b') + pl.axhline(y=1,color='r') + pl.xticks(index, label, rotation=90) + pl.ylabel('Foldchange of codon occupancy') + ax.yaxis.set_ticks_position('left') + ax.xaxis.set_ticks_position('bottom') + pl.title(site+" site") + pl.savefig(dirout + '/fc_codons.png', format="png", dpi=340) + def __main__(): @@ -675,7 +702,7 @@ result.append(get_codon_usage(fh, GFF, options.site, options.kmer, options.asite)) (cond1, cond2, chi_pval) = plot_codon_usage(result, html_dir, options.c1, options.c2, options.outfile,options.color1, options.color2) # t_pval = compute_FC_plot(cond1,cond2,codons,html_dir) - + plot_fc (cond1, cond2, options.site, html_dir) # #If there are no replicat elif options.rep == "no" : @@ -686,7 +713,7 @@ result.append(get_codon_usage(fh, GFF, options.site, options.kmer,options.asite)) (cond1, cond2, chi_pval) = plot_codon_usage(result, html_dir, options.c1, options.c2, options.outfile,options.color1, options.color2) # t_pval = compute_FC_plot(cond1,cond2,codons,html_dir) - + plot_fc (cond1, cond2, options.site, html_dir) else : sys.stderr.write("Please enter yes or no for --rep option. Programme aborted at %s" % time.asctime(time.localtime(time.time()))) sys.exit()
--- a/metagene_readthrough.py Fri Apr 10 03:18:56 2015 -0400 +++ b/metagene_readthrough.py Mon May 11 09:53:08 2015 -0400 @@ -38,7 +38,7 @@ try : rpkm = "{0:.4f}".format(count_gene*1000000.0/(count_tot*length)) except ArithmeticError : - stop_err( 'Illegal division by zero') + stop_err( 'Illegal division by zero during computing RPKM') return float(rpkm) @@ -682,12 +682,15 @@ spamreader = csv.reader(csvfile, delimiter='\t') ## skip the header next(spamreader, None) - for row in spamreader: - if row[2] == '-' : - gene_table += '<tr><td>%s</td><td><a href="%s.png" data-lightbox="%s"><img src="%s_thumbnail.png" /></a></td><td><a title="%s">%s</a></td><td>%s</td><td>%s:%s-%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>' %(row[0], row[0], row[0], row[0], row[11], row[1], row[3], row[4], row[5], row[6], row[8], row[9], row[10], row[12]) - - gene_table += '</tbody></table>' - + ##test if file is empty or not + if next(spamreader, None): + for row in spamreader: + if row[2] == '-' : + gene_table += '<tr><td>%s</td><td><a href="%s.png" data-lightbox="%s"><img src="%s_thumbnail.png" /></a></td><td><a title="%s">%s</a></td><td>%s</td><td>%s:%s-%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>' %(row[0], row[0], row[0], row[0], row[11], row[1], row[3], row[4], row[5], row[6], row[8], row[9], row[10], row[12]) + + gene_table += '</tbody></table>' + else : + gene_table = 'Sorry, there are no stop codon readthrough genes in your data\n' html_str = """ @@ -930,7 +933,7 @@ parser.add_option("-d", "--dirout", dest="dirout", type= "string", help="write report in this html file and in associated directory", metavar="STR,STR") - parser.add_option("-e", "--extend", dest="extend", type= "int", + parser.add_option("-e", "--extend", dest="extend", type= "int",default = 300 , help="Lenght of extension after stop in number of base pairs (depends on your organisme)", metavar="INT") parser.add_option("-q", "--quiet",