# HG changeset patch # User davidvanzessen # Date 1450700617 18000 # Node ID 431797cd74c81dca3c71d801f9075b99580172ea # Parent ee1bda8c27c8cd2193ae275974ce5dcce784e0e8 Uploaded diff -r ee1bda8c27c8 -r 431797cd74c8 imgt_loader/imgt_loader.py --- a/imgt_loader/imgt_loader.py Mon Nov 09 08:28:31 2015 -0500 +++ b/imgt_loader/imgt_loader.py Mon Dec 21 07:23:37 2015 -0500 @@ -93,7 +93,8 @@ r"(IGLV[0-9]-[0-9]{1,2})", r"(TRAV[0-9]{1,2}(-[1-46])?(/DV[45678])?)", r"(TRGV[234589])", - r"(TRDV[1-3])"] + r"(TRDV[1-3])", + r"(IGHV[0-9]S[0-9]+)"] dPattern = [r"(IGHD[0-9]-[0-9ab]+)", r"(TRBD[12])", diff -r ee1bda8c27c8 -r 431797cd74c8 report_clonality/RScript.r --- a/report_clonality/RScript.r Mon Nov 09 08:28:31 2015 -0500 +++ b/report_clonality/RScript.r Mon Dec 21 07:23:37 2015 -0500 @@ -44,6 +44,8 @@ # ---------------------- Data preperation ---------------------- +print("Report Clonality - Data preperation") + inputdata = read.table(infile, sep="\t", header=TRUE, fill=T, comment.char="") setwd(outdir) @@ -114,6 +116,8 @@ # ---------------------- Counting the productive/unproductive and unique sequences ---------------------- +print("Report Clonality - counting productive/unproductive/unique") + if(!("Functionality" %in% inputdata)){ #add a functionality column to the igblast data inputdata$Functionality = "unproductive" search = (inputdata$VDJ.Frame != "In-frame with stop codon" & inputdata$VDJ.Frame != "Out-of-frame" & inputdata$CDR3.Found.How != "NOT_FOUND") @@ -177,6 +181,8 @@ # ---------------------- Frequency calculation for V, D and J ---------------------- +print("Report Clonality - frequency calculation V, D and J") + PRODFV = data.frame(data.table(PRODF)[, list(Length=sum(freq)), by=c("Sample", "Top.V.Gene")]) Total = ddply(PRODFV, .(Sample), function(x) data.frame(Total = sum(x$Length))) PRODFV = merge(PRODFV, Total, by.x='Sample', by.y='Sample', all.x=TRUE) @@ -194,6 +200,8 @@ # ---------------------- Setting up the gene names for the different species/loci ---------------------- +print("Report Clonality - getting genes for species/loci") + Vchain = "" Dchain = "" Jchain = "" @@ -233,7 +241,9 @@ useD = FALSE cat("No D Genes in this species/locus") } -print(paste("useD:", useD)) +print(paste(nrow(Vchain), "genes in V")) +print(paste(nrow(Dchain), "genes in D")) +print(paste(nrow(Jchain), "genes in J")) # ---------------------- merge with the frequency count ---------------------- @@ -245,6 +255,8 @@ # ---------------------- Create the V, D and J frequency plots and write the data.frame for every plot to a file ---------------------- +print("Report Clonality - V, D and J frequency plots") + pV = ggplot(PRODFV) pV = pV + geom_bar( aes( x=factor(reorder(Top.V.Gene, chr.orderV)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) pV = pV + xlab("Summary of V gene") + ylab("Frequency") + ggtitle("Relative frequency of V gene usage") @@ -285,6 +297,8 @@ # ---------------------- Now the frequency plots of the V, D and J families ---------------------- +print("Report Clonality - V, D and J family plots") + VGenes = PRODF[,c("Sample", "Top.V.Gene")] VGenes$Top.V.Gene = gsub("-.*", "", VGenes$Top.V.Gene) VGenes = data.frame(data.table(VGenes)[, list(Count=.N), by=c("Sample", "Top.V.Gene")]) @@ -334,6 +348,8 @@ # ---------------------- Plotting the cdr3 length ---------------------- +print("Report Clonality - CDR3 length plot") + CDR3Length = data.frame(data.table(PRODF)[, list(Count=.N), by=c("Sample", "CDR3.Length.DNA")]) TotalPerSample = data.frame(data.table(CDR3Length)[, list(total=sum(.SD$Count)), by=Sample]) CDR3Length = merge(CDR3Length, TotalPerSample, by="Sample") @@ -350,7 +366,6 @@ # ---------------------- Plot the heatmaps ---------------------- - #get the reverse order for the V and D genes revVchain = Vchain revDchain = Dchain @@ -358,6 +373,7 @@ revDchain$chr.orderD = rev(revDchain$chr.orderD) if(useD){ + print("Report Clonality - Heatmaps VD") plotVD <- function(dat){ if(length(dat[,1]) == 0){ return() @@ -389,10 +405,11 @@ completeVD = merge(completeVD, revVchain, by.x="Top.V.Gene", by.y="v.name", all.x=TRUE) completeVD = merge(completeVD, Dchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) VDList = split(completeVD, f=completeVD[,"Sample"]) - lapply(VDList, FUN=plotVD) } +print("Report Clonality - Heatmaps VJ") + plotVJ <- function(dat){ if(length(dat[,1]) == 0){ return() @@ -427,7 +444,10 @@ VJList = split(completeVJ, f=completeVJ[,"Sample"]) lapply(VJList, FUN=plotVJ) + + if(useD){ + print("Report Clonality - Heatmaps DJ") plotDJ <- function(dat){ if(length(dat[,1]) == 0){ return()