changeset 18:431797cd74c8 draft

Uploaded
author davidvanzessen
date Mon, 21 Dec 2015 07:23:37 -0500
parents ee1bda8c27c8
children 672d5e010b1f
files imgt_loader/imgt_loader.py report_clonality/RScript.r
diffstat 2 files changed, 25 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/imgt_loader/imgt_loader.py	Mon Nov 09 08:28:31 2015 -0500
+++ b/imgt_loader/imgt_loader.py	Mon Dec 21 07:23:37 2015 -0500
@@ -93,7 +93,8 @@
 						r"(IGLV[0-9]-[0-9]{1,2})",
 						r"(TRAV[0-9]{1,2}(-[1-46])?(/DV[45678])?)",
 						r"(TRGV[234589])",
-						r"(TRDV[1-3])"]
+						r"(TRDV[1-3])",
+						r"(IGHV[0-9]S[0-9]+)"]
 
 dPattern = [r"(IGHD[0-9]-[0-9ab]+)",
 						r"(TRBD[12])",
--- a/report_clonality/RScript.r	Mon Nov 09 08:28:31 2015 -0500
+++ b/report_clonality/RScript.r	Mon Dec 21 07:23:37 2015 -0500
@@ -44,6 +44,8 @@
 
 # ---------------------- Data preperation ----------------------
 
+print("Report Clonality - Data preperation")
+
 inputdata = read.table(infile, sep="\t", header=TRUE, fill=T, comment.char="")
 
 setwd(outdir)
@@ -114,6 +116,8 @@
 
 # ---------------------- Counting the productive/unproductive and unique sequences ----------------------
 
+print("Report Clonality - counting productive/unproductive/unique")
+
 if(!("Functionality" %in% inputdata)){ #add a functionality column to the igblast data
   inputdata$Functionality = "unproductive"
   search = (inputdata$VDJ.Frame != "In-frame with stop codon" & inputdata$VDJ.Frame != "Out-of-frame" & inputdata$CDR3.Found.How != "NOT_FOUND")
@@ -177,6 +181,8 @@
 
 # ---------------------- Frequency calculation for V, D and J ----------------------
 
+print("Report Clonality - frequency calculation V, D and J")
+
 PRODFV = data.frame(data.table(PRODF)[, list(Length=sum(freq)), by=c("Sample", "Top.V.Gene")])
 Total = ddply(PRODFV, .(Sample), function(x) data.frame(Total = sum(x$Length)))
 PRODFV = merge(PRODFV, Total, by.x='Sample', by.y='Sample', all.x=TRUE)
@@ -194,6 +200,8 @@
 
 # ---------------------- Setting up the gene names for the different species/loci ----------------------
 
+print("Report Clonality - getting genes for species/loci")
+
 Vchain = ""
 Dchain = ""
 Jchain = ""
@@ -233,7 +241,9 @@
   useD = FALSE
   cat("No D Genes in this species/locus")
 }
-print(paste("useD:", useD))
+print(paste(nrow(Vchain), "genes in V"))
+print(paste(nrow(Dchain), "genes in D"))
+print(paste(nrow(Jchain), "genes in J"))
 
 # ---------------------- merge with the frequency count ----------------------
 
@@ -245,6 +255,8 @@
 
 # ---------------------- Create the V, D and J frequency plots and write the data.frame for every plot to a file ----------------------
 
+print("Report Clonality - V, D and J frequency plots")
+
 pV = ggplot(PRODFV)
 pV = pV + geom_bar( aes( x=factor(reorder(Top.V.Gene, chr.orderV)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1))
 pV = pV + xlab("Summary of V gene") + ylab("Frequency") + ggtitle("Relative frequency of V gene usage")
@@ -285,6 +297,8 @@
 
 # ---------------------- Now the frequency plots of the V, D and J families ----------------------
 
+print("Report Clonality - V, D and J family plots")
+
 VGenes = PRODF[,c("Sample", "Top.V.Gene")]
 VGenes$Top.V.Gene = gsub("-.*", "", VGenes$Top.V.Gene)
 VGenes = data.frame(data.table(VGenes)[, list(Count=.N), by=c("Sample", "Top.V.Gene")])
@@ -334,6 +348,8 @@
 
 # ---------------------- Plotting the cdr3 length ----------------------
 
+print("Report Clonality - CDR3 length plot")
+
 CDR3Length = data.frame(data.table(PRODF)[, list(Count=.N), by=c("Sample", "CDR3.Length.DNA")])
 TotalPerSample = data.frame(data.table(CDR3Length)[, list(total=sum(.SD$Count)), by=Sample])
 CDR3Length = merge(CDR3Length, TotalPerSample, by="Sample")
@@ -350,7 +366,6 @@
 
 # ---------------------- Plot the heatmaps ----------------------
 
-
 #get the reverse order for the V and D genes
 revVchain = Vchain
 revDchain = Dchain
@@ -358,6 +373,7 @@
 revDchain$chr.orderD = rev(revDchain$chr.orderD)
 
 if(useD){
+  print("Report Clonality - Heatmaps VD")
   plotVD <- function(dat){
     if(length(dat[,1]) == 0){
       return()
@@ -389,10 +405,11 @@
   completeVD = merge(completeVD, revVchain, by.x="Top.V.Gene", by.y="v.name", all.x=TRUE)
   completeVD = merge(completeVD, Dchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE)
   VDList = split(completeVD, f=completeVD[,"Sample"])
-  
   lapply(VDList, FUN=plotVD)
 }
 
+print("Report Clonality - Heatmaps VJ")
+
 plotVJ <- function(dat){
   if(length(dat[,1]) == 0){
     return()
@@ -427,7 +444,10 @@
 VJList = split(completeVJ, f=completeVJ[,"Sample"])
 lapply(VJList, FUN=plotVJ)
 
+
+
 if(useD){
+  print("Report Clonality - Heatmaps DJ")	
   plotDJ <- function(dat){
     if(length(dat[,1]) == 0){
       return()