Mercurial > repos > rlegendre > ribo_tools
changeset 4:eea5fec46e5c
Uploaded
author | rlegendre |
---|---|
date | Mon, 20 Oct 2014 11:07:19 -0400 |
parents | 8d8552899d20 |
children | 0b8fa8b7f356 |
files | metagene_frameshift_analysis.py |
diffstat | 1 files changed, 884 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/metagene_frameshift_analysis.py Mon Oct 20 11:07:19 2014 -0400 @@ -0,0 +1,884 @@ +#!/usr/bin/env python2.7 +#-*- coding: utf-8 -*- +''' + Created on sep. 2013 + @author: rachel legendre + @copyright: rachel.legendre@igmors.u-psud.fr + @license: GPL v3 +''' + + +import os, sys, optparse, tempfile, subprocess, re, shutil, commands, urllib, time, csv +import math +from numpy import arange +#from matplotlib import pyplot as pl +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as pl +import itertools +from Bio import SeqIO +from Bio.Seq import Seq +from Bio.Alphabet import IUPAC +from PIL import Image +##libraries for debugg +#import cPickle +#import pdb + + +##Setting +MIN_COV_REQUIREMENT = 500 +total_mapped_read = 0 +_MAX = 30 +whole_phasing_table = [[],[],[]] + + +def stop_err( msg ): + sys.stderr.write( "%s\n" % msg ) + sys.stderr.write( "Programme aborted at %s\n" % time.asctime(time.localtime(time.time()))) + sys.exit() + + +def split_bam(bamfile,tmpdir): + ''' + split bam by chromosome and write sam file in tmpdir + ''' + try: + #get header + results = subprocess.check_output(['samtools', 'view', '-H',bamfile]) + header = results.split('\n') + + #define genome size + genome = [] + for line in header: + result = re.search('SN', line) + if result : + #print line + feat = line.split('\t') + chrom = re.split(":", feat[1]) + #print feat[1] + genome.append(chrom[1]) + + #split sam by chrom + n = 0 + for chrm in genome: + with open(tmpdir+'/'+chrm+'.sam', 'w') as f : + #write header correctly for each chromosome + f.write(header[0]+'\n') + expr = re.compile(chrm+'\t') + el =[elem for elem in header if expr.search(elem)][0] + f.write(el+'\n') + f.write(header[-2]+'\n') + #write all reads for each chromosome + reads = subprocess.check_output(["samtools", "view", bamfile, chrm]) + f.write(reads) + # calculate number of reads + n += reads.count(chrm) + + sys.stdout.write("%d reads are presents in your bam file\n" % n) + + except Exception, e: + stop_err( 'Error during bam file splitting : ' + str( e ) ) + + + +def get_first_base(tmpdir, kmer): + ''' + write footprint coverage file for each sam file in tmpdir + ''' + global total_mapped_read + try: + file_array = (commands.getoutput('ls '+tmpdir)).split('\n') + ##write coverage for each sam file in tmpdir + for samfile in file_array: + with open(tmpdir+'/'+samfile, 'r') as sam : + ##get chromosome name + chrom = samfile.split(".sam")[0] + + for line in sam: + #initialize dictionnary + if '@SQ' in line : + size = int(line.split('LN:')[1]) + genomeF = [0]*size + genomeR = [0]*size + # define multiple reads keys from mapper + elif '@PG' in line : + if 'bowtie' in line: + multi_tag = "XS:i:" + elif 'bwa' in line: + multi_tag = "XT:A:R" + else : + stop_err("No PG tag find in"+samfile+". Please use bowtie or bwa for mapping") + + # get footprint + elif re.search('^[^@].+', line) : + #print line.rstrip() + read_pos = int(line.split('\t')[3]) + read_sens = int(line.split('\t')[1]) + len_read = len(line.split('\t')[9]) + read_qual = int(line.split('\t')[4]) + if len_read == kmer and multi_tag not in line and read_qual > 20 : + ###if line.split('\t')[5] == '28M' : + # print line.rstrip() + total_mapped_read +=1 + #if it's a forward read + if read_sens == 0 : + #get P-site : start read + 12 nt + read_pos += 15 + genomeF[read_pos] += 1 + #if it's a reverse read + elif read_sens == 16 : + #get P-site + read_pos += 12 + genomeR[read_pos] += 1 + + #try: + #write coverage in files + with open(tmpdir+'/assoCov_'+chrom+'.txt', 'w') as cov : + for i in range(0,size): + cov.write(str(genomeF[i])+'\t-'+str(genomeR[i])+'\n') + #except Exception,e: + # stop_err( 'Error during coverage file writting : ' + str( e ) ) + del genomeF + del genomeR + sys.stdout.write("%d reads are used for frame analysis\n" % total_mapped_read) + except Exception, e: + stop_err( 'Error during footprinting : ' + str( e ) ) + + + + + +def store_gff(gff): + ''' + parse and store gff file in a dictionnary + ''' + try: + GFF = {} + with open(gff, 'r') as f_gff : + GFF['order'] = [] + for line in f_gff: + ## switch commented lines + line = line.split("#")[0] + if line != "" : + feature = (line.split('\t')[8]).split(';') + # first line is already gene line : + if line.split('\t')[2] == 'gene' : + gene = feature[0].replace("ID=","") + if re.search('gene',feature[2]) : + Name = feature[2].replace("gene=","") + else : + Name = "Unknown" + ##get annotation + note = re.sub(r".+\;Note\=(.+)\;display\=.+", r"\1", line) + note = urllib.unquote(str(note)).replace("\n","") + ## store gene information + GFF['order'].append(gene) + GFF[gene] = {} + GFF[gene]['chrom'] = line.split('\t')[0] + GFF[gene]['start'] = int(line.split('\t')[3]) + GFF[gene]['stop'] = int(line.split('\t')[4]) + GFF[gene]['strand'] = line.split('\t')[6] + GFF[gene]['name'] = Name + GFF[gene]['note'] = note + GFF[gene]['exon'] = {} + GFF[gene]['exon_number'] = 0 + #print Name + elif line.split('\t')[2] == 'CDS' : + gene = re.sub(r".?Parent\=(.+)(_mRNA)+", r"\1", feature[0]) + if GFF.has_key(gene) : + GFF[gene]['exon_number'] += 1 + exon_number = GFF[gene]['exon_number'] + GFF[gene]['exon'][exon_number] = {} + GFF[gene]['exon'][exon_number]['frame'] = line.split('\t')[7] + GFF[gene]['exon'][exon_number]['start'] = int(line.split('\t')[3]) + GFF[gene]['exon'][exon_number]['stop'] = int(line.split('\t')[4]) + + ## if there is a five prim UTR intron, we change start of gene + elif line.split('\t')[2] == 'five_prime_UTR_intron' : + if GFF[gene]['strand'] == "+" : + GFF[gene]['start'] = GFF[gene]['exon'][1]['start'] + else : + GFF[gene]['stop'] = GFF[gene]['exon'][exon_number]['stop'] + return GFF + except Exception,e: + stop_err( 'Error during gff storage : ' + str( e ) ) + + +#chrI SGD gene 87286 87752 . + . ID=YAL030W;Name=YAL030W;gene=SNC1;Alias=SNC1;Ontology_term=GO:0005484,GO:0005768,GO:0005802,GO:0005886,GO:0005935,GO:0006887,GO:0006893,GO:000689 +#7,GO:0006906,GO:0030658,GO:0031201;Note=Vesicle%20membrane%20receptor%20protein%20%28v-SNARE%29%3B%20involved%20in%20the%20fusion%20between%20Golgi-derived%20secretory%20vesicles%20with%20the%20plasma%20membra +#ne%3B%20proposed%20to%20be%20involved%20in%20endocytosis%3B%20member%20of%20the%20synaptobrevin%2FVAMP%20family%20of%20R-type%20v-SNARE%20proteins%3B%20SNC1%20has%20a%20paralog%2C%20SNC2%2C%20that%20arose%20fr +#om%20the%20whole%20genome%20duplication;display=Vesicle%20membrane%20receptor%20protein%20%28v-SNARE%29;dbxref=SGD:S000000028;orf_classification=Verified +#chrI SGD CDS 87286 87387 . + 0 Parent=YAL030W_mRNA;Name=YAL030W_CDS;orf_classification=Verified +#chrI SGD CDS 87501 87752 . + 0 Parent=YAL030W_mRNA;Name=YAL030W_CDS;orf_classification=Verified +#chrI SGD intron 87388 87500 . + . Parent=YAL030W_mRNA;Name=YAL030W_intron;orf_classification=Verified + + +def __percent__(prop): + + if sum(prop) != 0 : + perc = [0,0,0] + if prop[0] != 0 : + perc[0] = int("{0:.0f}".format((prop[0]*100.0)/sum(prop))) + if prop[1] != 0 : + perc[1] = int("{0:.0f}".format((prop[1]*100.0)/sum(prop))) + if prop[2] != 0 : + perc[2] = int("{0:.0f}".format((prop[2]*100.0)/sum(prop))) + return perc + else : + return prop + + +def frame_analysis(tmpdir,subfolder,cutoff,GFF,box_file): + ''' + This function take footprint and annotation (gff) for analyse reading frame in each gene + ''' + global total_mapped_read + global whole_phasing_table + try: + chrom = "" # initializing chromosome + nb_gene = 0 # number of analysed genes + myheader = ['Gene','Name','Total_proportion','Dual_coding','RPKM','Segment_RPKM','Note'] + + with open(subfolder+'/dual_coding.tab',"w") as out : + #out.write('Gene\tName\tTotal_proportion\tDual_coding\tRPKM\tSegment_RPKM\tNote\n') + writer = csv.writer(out,delimiter='\t') + writer.writerow(myheader) + whole_phasing = [0,0,0] + for gene in GFF['order']: + cov = [] + ##first chromosome : we open corresponding file + if chrom == "" : + chrom = GFF[gene]['chrom'] + with open(tmpdir+"/assoCov_"+chrom+".txt") as f : + data = f.readlines() + ##if we change chrosomosome + elif chrom != GFF[gene]['chrom'] : + chrom = GFF[gene]['chrom'] + with open(tmpdir+"/assoCov_"+chrom+".txt") as f : + data = f.readlines() + + ## if a gene without intron : + if GFF[gene]['exon_number'] == 1: + + ## get coverage for each gene + if GFF[gene]['strand'] == "+": + for i in range(GFF[gene]['exon'][1]['start'],GFF[gene]['exon'][1]['stop']+1): + cov.append(int((data[i].rstrip()).split("\t")[0])) + else : + for i in range(GFF[gene]['exon'][1]['start'],GFF[gene]['exon'][1]['stop']+1): + cov.append(int(((data[i].rstrip()).split("\t")[1]).replace("-",""))) + cov.reverse() + else : + ## For each gene, get coverage and sum of exon size + if GFF[gene]['strand'] == "+": + + for exon in range(1,GFF[gene]['exon_number']+1) : + for i in range(GFF[gene]['exon'][exon]['start'],GFF[gene]['exon'][exon]['stop']+1): + cov.append(int((data[i].rstrip()).split("\t")[0])) + else : + + for exon in range(1,GFF[gene]['exon_number']+1) : + for i in range(GFF[gene]['exon'][exon]['start'],GFF[gene]['exon'][exon]['stop']+1): + cov.append(int(((data[i].rstrip()).split("\t")[1]).replace("-",""))) + cov.reverse() + + len_cov = len(cov) + #segmentation by gene size + if len_cov < 1000 : + nb_segment = 3 + elif len_cov > 1000 and len_cov < 2000: + nb_segment = 6 + else : + nb_segment = 9 + ## number of aa in gene : + nb_aa = len_cov/3 + len_frag = len_cov/nb_segment + prop = [0,0,0] + seg_prop = [] + tot_prop = prop + tot_rpkm = 0 + seg_rpkm = [] + window = 0 + GFF[gene]['dual_coding'] = '-' + ''' segmentation et total proportions ''' + if sum(cov) > MIN_COV_REQUIREMENT: + nb_gene += 1 + for nuc in range(0,len_cov-2,3) : + window += 1 + tot_rpkm += sum(cov[nuc:nuc+2]) + ## Calculate proportion + prop[0] += cov[nuc] + prop[1] += cov[nuc+1] + prop[2] += cov[nuc+2] + #segmented proportion + if window == nb_aa/nb_segment: + seg_prop.append(__percent__(prop)) + whole_phasing = (map(lambda (x, y): x + y, zip(whole_phasing, prop))) + tot_prop = (map(lambda (x, y): x + y, zip(tot_prop, prop))) + seg_rpkm.append("{0:.2f}".format((tot_rpkm*1000000.0)/(total_mapped_read*len_frag))) + window = 0 + tot_rpkm = 0 + prop = [0,0,0] + + GFF[gene]['total_prop'] = __percent__(tot_prop) + GFF[gene]['seg_prop'] = seg_prop + GFF[gene]['seg_rpkm'] = seg_rpkm + tot_rpkm = "{0:.2f}".format(sum(cov)*1000000.0/(total_mapped_read*len_cov)) + GFF[gene]['total_rpkm'] = tot_rpkm + ## If gene has non standard proportion + idx = 0 + for seg in GFF[gene]['seg_prop'] : + if (seg[0] < cutoff) and (sum(seg) != 0 and GFF[gene]['seg_rpkm'][idx] != 0.0) : + GFF[gene]['dual_coding'] = 'yes' + idx += 1 + for z in range(0,3) : + whole_phasing_table[z].append(__percent__(tot_prop)[z]) + else : + for nuc in range(0,len_cov-2,3) : + ## Calculate proportion + prop[0] += cov[nuc] + prop[1] += cov[nuc+1] + prop[2] += cov[nuc+2] + whole_phasing = (map(lambda (x, y): x + y, zip(whole_phasing, prop))) + #for z in range(0,3) : + # whole_phasing_table[z].append(__percent__(tot_prop)[z]) + + if GFF[gene]['dual_coding'] == 'yes' : + mylist = [gene,GFF[gene]['name'],GFF[gene]['total_prop'],GFF[gene]['seg_prop'],GFF[gene]['total_rpkm'],GFF[gene]['seg_rpkm'],GFF[gene]['note']] + #out.write(gene+'\t'+GFF[gene]['name']+'\t'+str(GFF[gene]['total_prop'])+'\t'+str(GFF[gene]['seg_prop'])+'\t'+str(GFF[gene]['total_rpkm'])+'\t'+GFF[gene]['note']+'\n') + writer.writerow(mylist) + GFF[gene]['coverage'] = cov + else : + del GFF[gene] + + del GFF['order'] + + + whole_phasing = __percent__(whole_phasing) + sys.stdout.write("Proportion of reads in each frame :\n%s\n" % whole_phasing) + sys.stdout.write("%d genes are used for frame analysis\n" % nb_gene) + + ##produce boxplot + pl.boxplot(whole_phasing_table,notch=True,sym='ro') + pl.ylabel('Total number of footprints (percent)') + pl.draw() + pl.savefig(subfolder+'/boxplot.png',format='png', dpi=640) + pl.savefig(box_file, format='png', dpi=640) + pl.clf() + return GFF + + except Exception, e: + stop_err( 'Error during frame analysis : ' + str( e ) ) + +def get_orf(fasta,gene): + ''' + This function compute ORF in each gene and return multiple lists of methionines and stop in each frame + ''' + try : + SeqDict = SeqIO.to_dict(SeqIO.parse(open(fasta,"r"),"fasta")) + if gene['exon_number'] == 1 : + if gene['strand'] == '-' : + gene_seq = str(SeqDict[gene['chrom']].seq[gene['start']-2:gene['stop']+1].reverse_complement()) + else : + gene_seq = str(SeqDict[gene['chrom']].seq[gene['start']-2:gene['stop']+1]) + else : + ## get sequence of gene with intron with -1 and +1 nt for detect all phases + gene_seq = str(SeqDict[gene['chrom']].seq[gene['start']-2:gene['start']-1]) + for exon in range(1,gene['exon_number']+1) : + gene_seq = gene_seq+str(SeqDict[gene['chrom']].seq[gene['exon'][exon]['start']-1:gene['exon'][exon]['stop']]) + gene_seq = gene_seq+str(SeqDict[gene['chrom']].seq[gene['stop']:gene['stop']+1]) + if gene['strand'] == '-' : + my_seq = Seq(gene_seq,IUPAC.unambiguous_dna).reverse_complement() + #gene_seq = my_seq.tostring() DEPRECATED + gene_seq = str(my_seq) + # initialize array of start/stop array_1 is fame 0, array 2 is frame +1, array_3 is frame -1 + stop_1 = [] + stop_2 = [] + stop_3 = [] + met_1 = [] + met_2 = [] + met_3 = [] + stops = ['TAA','TGA','TAG'] + for z in range(0,len(gene_seq),3): + # Frame -1 + if gene_seq[z:z+3] == 'ATG' : + met_3.append(z) + elif gene_seq[z:z+3] in stops : + stop_3.append(z) + # Frame 0 + if gene_seq[z+1:z+4] == 'ATG' : + met_1.append(z+1) + elif gene_seq[z+1:z+4] in stops : + stop_1.append(z+1) + # Frame +1 + if gene_seq[z+2:z+5] == 'ATG' : + met_2.append(z+2) + elif gene_seq[z+2:z+5] in stops : + stop_2.append(z+2) + + + return gene_seq,met_1,stop_1,met_2,stop_2,met_3,stop_3 + + except Exception, e: + stop_err( 'Error during ORF computing : ' + str( e ) ) + +def plot_subprofile(GFF, directory,fasta) : + ''' + This function plot subprofiles of footprint and ORFs for each gene in GFF dict + ''' + global _MAX + try : + + for gene in GFF.iterkeys(): + nb_aa = len(GFF[gene]['coverage'])/3 + subprofile_1 = [] + subprofile_2 = [] + subprofile_3 = [] + cov = GFF[gene]['coverage'] + + for z in range(0,len(cov)-2,3): + for i in range(3): + subprofile_1.append(cov[z]) + subprofile_2.append(cov[z+1]) + subprofile_3.append(cov[z+2]) + + ## Normalisation of density plot + tot = max(cov) + + subprofile_1 = [(x * _MAX/tot) for x in subprofile_1] + subprofile_2 = [(x * _MAX/tot) for x in subprofile_2] + subprofile_3 = [(x * _MAX/tot) for x in subprofile_3] + + + if len(subprofile_1) == len(subprofile_2) and len(subprofile_1) == len(subprofile_3) : + + ind = arange(len(subprofile_1)) + fig = pl.figure(num=1) + #sub profile frame 0 + pl.subplot(4,1,1) + ax1 = fig.add_subplot(4,1,1) + pl.title( "%s (%s)\n$(%s RPKM)$"% (gene, GFF[gene]['name'], GFF[gene]['total_rpkm'])) + pl.ylim(0,_MAX) + pl.xlim(0,len(subprofile_1)) + pl.yticks([0, _MAX/2, _MAX]) + ax1.plot(ind, subprofile_1, 'b-', lw = 3,label = 'Frame 0') + ax1.fill_between(ind, 0,subprofile_1,color='b') + ax1.legend(loc=1,prop={'size':8}) + pl.ylabel('#RPF') + + ##sub profile frame +1 + pl.subplot(4,1,2) + ax2 = fig.add_subplot(4,1,2) + pl.ylim(0,_MAX) + pl.xlim(0,len(subprofile_2)) + pl.yticks([0, _MAX/2, _MAX]) + ax2.plot(ind,subprofile_2,'r-',lw = 3,label = 'Frame +1') + ax2.fill_between(ind, 0,subprofile_2,color='r') + ax2.legend(loc=1,prop={'size':8}) + pl.ylabel('#RPF') + + ##sub profile frame -1 + pl.subplot(4,1,3) + ax3 = fig.add_subplot(4,1,3) + pl.ylim(0,_MAX) + pl.yticks([0, _MAX/2, _MAX]) + pl.xlim(0,len(subprofile_3)) + ax3.plot(ind,subprofile_3,'g-',lw =3,label = 'Frame -1') + ax3.fill_between(ind, 0,subprofile_3,color='g') + ax3.legend(loc=1,prop={'size':8}) + pl.ylabel('#RPF') + + ## get orf for each frame + gene_seq,met_1,stop_1,met_2,stop_2,met_3,stop_3 = get_orf(fasta,GFF[gene]) + ## ORF plot + idc = arange(len(gene_seq)) + ##define axe for frame : + x = [0]*len(gene_seq) + y = [1]*len(gene_seq) + z = [2]*len(gene_seq) + ## define last subplot + #pdb.set_trace() + pl.subplot(4,1,4) + ## modify axes setting + ax4 = fig.add_subplot(4,1,4) + ax4.spines['right'].set_color('none') + ax4.spines['top'].set_color('none') + ax4.spines['left'].set_color('none') + ax4.spines['bottom'].set_color('none') + ax4.tick_params(bottom = 'off',top='off',left='off', right='off') + pl.ylim(0,3) + pl.xlim(0,len(gene_seq)) + pl.yticks([0.5,1.5,2.5],[r'-1',r'+1',r'0']) + pl.xticks([]) + ## plot line for delimiting frame and arrow corresponding to start and stop + pl.plot(idc,x,'k',lw=2) + for val in met_1: + pl.arrow( val, 2, 0.0, 0.5, fc="g", ec="g",lw=2) + for val in stop_1: + pl.arrow( val, 2, 0.0, 1, fc="r", ec="r",lw=2) + pl.plot(idc,y,'k') + for val in met_2: + pl.arrow( val, 1, 0.0, 0.5, fc="g", ec="g",lw=2) + for val in stop_2: + pl.arrow( val, 1, 0.0, 1, fc="r", ec="r",lw=2) + pl.plot(idc,z,'k') + for val in met_3: + pl.arrow( val, 0, 0.0, 0.5, fc="g", ec="g",lw=2) + for val in stop_3: + pl.arrow( val, 0, 0.0, 1, fc="r", ec="r",lw=2) + + + pl.ylabel('ORF',fontsize='small') + pl.draw() + pl.savefig(directory+'/'+gene+'.png',format='png', dpi=600) + pl.clf() + ##pl.show() + + ## Make thumbnail for html page + infile = directory+'/'+gene+'.png' + size = 128, 128 + im = Image.open(infile) + im.thumbnail(size, Image.ANTIALIAS) + im.save(directory+'/'+gene + "_thumbnail.png", "PNG") + + + else : + stop_err( 'Error during plot : lenght of subprofile are different ') + + except Exception, e: + stop_err( 'Error during subprofiles plotting : ' + str( e ) ) + +def write_html_page(html,subfolder,GFF) : + + + try : + gene_table = '' + gene_table += '<table>' + gene_table += '<thead><tr><th data-sort="string">Gene</th><th>Plot</th><th data-sort="string">Name</th><th>Total Proportion</th><th>Segment proportion</th><th data-sort="float">RPKM</th><th>Segment RPKM</th></tr></thead><tbody>' + for gene in GFF.iterkeys(): + gene_table += '<tr><td>%s</td><td><a href="%s.png" data-lightbox="%s"><img src="%s_thumbnail.png" /></a></td><td><a title="%s">%s</a></td><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>' %(gene,gene,gene,gene,GFF[gene]['note'],GFF[gene]['name'],GFF[gene]['total_prop'],GFF[gene]['seg_prop'],GFF[gene]['total_rpkm'],GFF[gene]['seg_rpkm']) + gene_table += '</tbody></table>' + + + html_str = """ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> + +<html xmlns="http://www.w3.org/1999/xhtml"> + <head> + <link rel="stylesheet" href="http://code.jquery.com/ui/1.10.3/themes/smoothness/jquery-ui.css" /> + <script src="http://code.jquery.com/jquery-1.10.2.min.js"></script> + <script src="http://ajax.googleapis.com/ajax/libs/jquery/1.9.1/jquery.min.js"></script> + <script src="http://code.jquery.com/ui/1.10.3/jquery-ui.js"></script> + <script src="lightbox/js/jquery-1.10.2.min.js"></script> + <script src="lightbox/js/lightbox-2.6.min.js"></script> + <link href="lightbox/css/lightbox.css" rel="stylesheet" /> + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> + <title>Dual coding result file</title> + <link rel="stylesheet" href="http://code.jquery.com/ui/1.10.3/themes/smoothness/jquery-ui.css" /> + <script> + (function(d){d.fn.stupidtable=function(b){return this.each(function(){var a=d(this);b=b||{};b=d.extend({},d.fn.stupidtable.default_sort_fns,b);var n=function(a,b){for(var f=[],c=0,e=a.slice(0).sort(b),h=0;h<a.length;h++){for(c=d.inArray(a[h],e);-1!=d.inArray(c,f);)c++;f.push(c)}return f},q=function(a,b){for(var d=a.slice(0),c=0,e=0;e<b.length;e++)c=b[e],d[c]=a[e];return d};a.on("click","th",function(){var m=a.children("tbody").children("tr"),g=d(this),f=0,c=d.fn.stupidtable.dir;a.find("th").slice(0, g.index()).each(function(){var a=d(this).attr("colspan")||1;f+=parseInt(a,10)});var e=g.data("sort-default")||c.ASC;g.data("sort-dir")&&(e=g.data("sort-dir")===c.ASC?c.DESC:c.ASC);var h=g.data("sort")||null;null!==h&&(a.trigger("beforetablesort",{column:f,direction:e}),a.css("display"),setTimeout(function(){var l=[],p=b[h];m.each(function(a,b){var c=d(b).children().eq(f),e=c.data("sort-value"),c="undefined"!==typeof e?e:c.text();l.push(c)});var k;k=e==c.ASC?n(l,p):n(l,function(a,b){return-p(a,b)}); a.find("th").data("sort-dir",null).removeClass("sorting-desc sorting-asc");g.data("sort-dir",e).addClass("sorting-"+e);k=d(q(m,k));a.children("tbody").remove();a.append("<tbody />").append(k);a.trigger("aftertablesort",{column:f,direction:e});a.css("display")},10))})})};d.fn.stupidtable.dir={ASC:"asc",DESC:"desc"};d.fn.stupidtable.default_sort_fns={"int":function(b,a){return parseInt(b,10)-parseInt(a,10)},"float":function(b,a){return parseFloat(b)-parseFloat(a)},string:function(b,a){return b<a?-1: b>a?1:0},"string-ins":function(b,a){b=b.toLowerCase();a=a.toLowerCase();return b<a?-1:b>a?1:0}}})(jQuery); + (function($) { + + $.fn.stupidtable = function(sortFns) { + return this.each(function() { + var $table = $(this); + sortFns = sortFns || {}; + + // ==================================================== // + // Utility functions // + // ==================================================== // + + // Merge sort functions with some default sort functions. + sortFns = $.extend({}, $.fn.stupidtable.default_sort_fns, sortFns); + + // Return the resulting indexes of a sort so we can apply + // this result elsewhere. This returns an array of index numbers. + // return[0] = x means "arr's 0th element is now at x" + var sort_map = function(arr, sort_function) { + var map = []; + var index = 0; + var sorted = arr.slice(0).sort(sort_function); + for (var i=0; i<arr.length; i++) { + index = $.inArray(arr[i], sorted); + + // If this index is already in the map, look for the next index. + // This handles the case of duplicate entries. + while ($.inArray(index, map) != -1) { + index++; + } + map.push(index); + } + + return map; + }; + + // Apply a sort map to the array. + var apply_sort_map = function(arr, map) { + var clone = arr.slice(0), + newIndex = 0; + for (var i=0; i<map.length; i++) { + newIndex = map[i]; + clone[newIndex] = arr[i]; + } + return clone; + }; + + // ==================================================== // + // Begin execution! // + // ==================================================== // + + // Do sorting when THs are clicked + $table.on("click", "th", function() { + var trs = $table.children("tbody").children("tr"); + var $this = $(this); + var th_index = 0; + var dir = $.fn.stupidtable.dir; + + $table.find("th").slice(0, $this.index()).each(function() { + var cols = $(this).attr("colspan") || 1; + th_index += parseInt(cols,10); + }); + + // Determine (and/or reverse) sorting direction, default `asc` + var sort_dir = $this.data("sort-default") || dir.ASC; + if ($this.data("sort-dir")) + sort_dir = $this.data("sort-dir") === dir.ASC ? dir.DESC : dir.ASC; + + // Choose appropriate sorting function. + var type = $this.data("sort") || null; + + // Prevent sorting if no type defined + if (type === null) { + return; + } + + // Trigger `beforetablesort` event that calling scripts can hook into; + // pass parameters for sorted column index and sorting direction + $table.trigger("beforetablesort", {column: th_index, direction: sort_dir}); + // More reliable method of forcing a redraw + $table.css("display"); + + // Run sorting asynchronously on a timout to force browser redraw after + // `beforetablesort` callback. Also avoids locking up the browser too much. + setTimeout(function() { + // Gather the elements for this column + var column = []; + var sortMethod = sortFns[type]; + + // Push either the value of the `data-order-by` attribute if specified + // or just the text() value in this column to column[] for comparison. + trs.each(function(index,tr) { + var $e = $(tr).children().eq(th_index); + var sort_val = $e.data("sort-value"); + var order_by = typeof(sort_val) !== "undefined" ? sort_val : $e.text(); + column.push(order_by); + }); + + // Create the sort map. This column having a sort-dir implies it was + // the last column sorted. As long as no data-sort-desc is specified, + // we're free to just reverse the column. + var theMap; + if (sort_dir == dir.ASC) + theMap = sort_map(column, sortMethod); + else + theMap = sort_map(column, function(a, b) { return -sortMethod(a, b); }); + + // Reset siblings + $table.find("th").data("sort-dir", null).removeClass("sorting-desc sorting-asc"); + $this.data("sort-dir", sort_dir).addClass("sorting-"+sort_dir); + + var sortedTRs = $(apply_sort_map(trs, theMap)); + $table.children("tbody").remove(); + $table.append("<tbody />").append(sortedTRs); + + // Trigger `aftertablesort` event. Similar to `beforetablesort` + $table.trigger("aftertablesort", {column: th_index, direction: sort_dir}); + // More reliable method of forcing a redraw + $table.css("display"); + }, 10); + }); + }); + }; + + // Enum containing sorting directions + $.fn.stupidtable.dir = {ASC: "asc", DESC: "desc"}; + + $.fn.stupidtable.default_sort_fns = { + "int": function(a, b) { + return parseInt(a, 10) - parseInt(b, 10); + }, + "float": function(a, b) { + return parseFloat(a) - parseFloat(b); + }, + "string": function(a, b) { + if (a < b) return -1; + if (a > b) return +1; + return 0; + }, + "string-ins": function(a, b) { + a = a.toLowerCase(); + b = b.toLowerCase(); + if (a < b) return -1; + if (a > b) return +1; + return 0; + } + }; + + })(jQuery); + $(function(){ + var table = $("table").stupidtable(); + + table.on("beforetablesort", function (event, data) { + // data.column - the index of the column sorted after a click + // data.direction - the sorting direction (either asc or desc) + $("#msg").text("Sorting index " + data.column) + }); + table.on("aftertablesort", function (event, data) { + var th = $(this).find("th"); + th.find(".arrow").remove(); + var dir = $.fn.stupidtable.dir; + var arrow = data.direction === dir.ASC ? "↑" : "↓"; + th.eq(data.column).append('<span class="arrow">' + arrow +'</span>'); + }); + }); + </script> + <style type="text/css"> + label { + display: inline-block; + width: 5em; + } + table { + border-collapse: collapse; + } + th, td { + padding: 5px 10px; + border: 1px solid #999; + } + th { + background-color: #a7d3ff; + } + th[data-sort]{ + cursor:pointer; + } + a[data-lightbox]{ + cursor:zoom-in; + } + #msg { + color: green; + } + </style> + </head> + <body> + <h1>Dual coding analyse results</h1> + %s + </body> +</html> """ % (gene_table) + + html_file = open(html,"w") + html_file.write(html_str) + html_file.close() + html_index = open(subfolder+'/index.html','w') + html_index.write(html_str) + html_index.close() + + + except Exception, e : + stop_err('Error during html page creation : ' + str( e ) ) + + + +def __main__(): + + #python /home/rlegendre/galaxy/galaxy-dist/tools/rib_profiling/metagene_frameshift_analysis.py -i /home/rlegendre/galaxy/galaxy-dist/SharedData/Ribo/Saccer3.gff -f /home/rlegendre/galaxy/galaxy-dist/SharedData/Ribo/Saccer3.fa -b /data/BY_project/BY_BAM/LMA830_sort.bam -o /data/BY_project/dual_coding_lma.csv -d /data/BY_project/LMA_dual/index.html,/data/BY_project/LMA_dual -c 60 + #python /home/rlegendre/galaxy/galaxy-dist/tools/rib_profiling/metagene_frameshift_analysis.py -i /home/rlegendre/galaxy/galaxy-dist/SharedData/Ribo/Saccer3.gff -f /home/rlegendre/galaxy/galaxy-dist/SharedData/Ribo/Saccer3.fa -b /home/rlegendre/Documents/PDE2S/PDE2S_trim_no_rRNA_28mer.bam -o /home/rlegendre/Documents/PDE2S/dual_coding_pde2s.csv -d /home/rlegendre/Documents/PDE2S/index.html,/home/rlegendre/Documents/PDE2S/PDE2S_dual -c 60 + #python /home/rlegendre/galaxy/galaxy-dist/tools/rib_profiling/metagene_frameshift_analysis.py -i /home/rlegendre/galaxy/galaxy-dist/SharedData/Ribo/Saccer3.gff -f /home/rlegendre/galaxy/galaxy-dist/SharedData/Ribo/Saccer3.fa -b /home/rlegendre/galaxy/galaxy-dist/SharedData/Ribo/RPF_psi+_28sorted.bam -o /home/rlegendre/Documents/test_rpkmlesszero.tab -d /home/rlegendre/Documents/index.html,/home/rlegendre/Documents/plouf -c 60 + + #Parse command line options + parser = optparse.OptionParser() + parser.add_option("-i", "--input", dest="gfffile", type= "string", + help="GFF annotation file", metavar="FILE") + + parser.add_option("-b", "--bam", dest="bamfile", type= "string", + help="Bam Ribo-Seq alignments ", metavar="FILE") + + parser.add_option("-f", "--fasta", dest="fasta", type= "string", + help="Fasta file ", metavar="FILE") + + parser.add_option("-d", "--dirout", dest="dirout", type= "string", + help="write report in this html file and in associated directory", metavar="STR,STR") + + parser.add_option("-x", "--boxplot", dest="box", type= "string", + help="report boxplot in this file", metavar="STR,STR") + + parser.add_option("-c", "--cutoff", dest="cutoff", type= "int", + help="Integer value for selecting all genes that have less than 60 % (default) of reads in coding frame ", metavar="INT") + + parser.add_option("-k", "--kmer", dest="kmer", type= "int", + help="Longer of your phasing reads", metavar="INT") + + parser.add_option("-q", "--quiet", + action="store_false", dest="verbose", default=True, + help="don't print status messages to stdout") + + (options, args) = parser.parse_args() + sys.stdout.write("Begin frame analysis at %s\n" % time.asctime( time.localtime(time.time()))) + # Create temp dir + try: + tmp_dir = tempfile.mkdtemp() + (html_file, subfolder ) = options.dirout.split(",") + + ##testing indexed bam file + if os.path.isfile(options.bamfile+".bai") : + pass + else : + cmd = "samtools index %s " % (options.bamfile) + proc = subprocess.Popen( args=cmd, shell=True, stderr = subprocess.PIPE) + returncode = proc.wait() + + ##RUN analysis + split_bam(options.bamfile,tmp_dir) + get_first_base (tmp_dir,options.kmer) + GFF = store_gff(options.gfffile) + # + #### cPickles for Test #### + #if os.path.isfile("/home/rlegendre/Documents/FrameShift/OutOfFrameWithIntron/pyt_dict") : + # with open("/home/rlegendre/Documents/FrameShift/OutOfFrameWithIntron/pyt_dict",'rb') as fp: + # GFF = cPickle.load(fp) + #else : + # split_bam(options.bamfile,tmp_dir) + # get_first_base (tmp_dir) + # GFF = store_gff(options.gfffile) + # GFF = frame_analysis (tmp_dir,options.outfile,options.cutoff,GFF,subfolder) + # with open("/home/rlegendre/OutOfFrameWithIntron/pyt_dict",'wb') as fp: + # cPickle.dump(GFF,fp) + # + #### cPickles for Test #### + + if os.path.exists(subfolder): + raise + try: + os.mkdir(subfolder) + except: + raise + GFF = frame_analysis (tmp_dir,subfolder,options.cutoff,GFF, options.box) + plot_subprofile (GFF,subfolder,options.fasta) + write_html_page(html_file,subfolder,GFF) + + ##paste jquery script in result directory : + jq_src = os.path.join(os.path.dirname(__file__),'lightbox') + shutil.copytree(jq_src,os.path.join(subfolder,'lightbox')) + + + + sys.stdout.write("Finish frame analysis at %s\n" % time.asctime( time.localtime(time.time()))) + except Exception, e: + stop_err( 'Error running metagene analysis : ' + str( e ) ) + + if os.path.exists( tmp_dir ): + shutil.rmtree( tmp_dir ) + + + +if __name__=="__main__": + __main__() \ No newline at end of file